Level: Real-World Project
Here my function to download images from url:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
test_urls = [
"https://books.toscrape.com/",
"https://en.wikipedia.org/wiki/Tomato"
]
def download_images(url, output_folder="downloaded_images"):
domain = urlparse(url).netloc.replace(".", "_")
folder_path = os.path.join(output_folder, domain)
if not os.path.exists(folder_path):
os.makedirs(folder_path)
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
img_tags = soup.find_all("img")
for img in img_tags:
img_url = urljoin(url, img["src"])
file_name = os.path.basename(urlparse(img_url).path)
img_data = requests.get(img_url).content
img_path = os.path.join(folder_path, file_name)
with open(img_path, "wb") as file:
file.write(img_data)
print(f"✅ Downloaded: {file_name}")
print(f"✅ All images downloaded!")
for url in test_urls:
download_images(url)
Here my function to download images from url:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
test_urls = [
"https://books.toscrape.com/",
"https://en.wikipedia.org/wiki/Tomato"
]
def download_images(url, output_folder="downloaded_images"):
domain = urlparse(url).netloc.replace(".", "_")
folder_path = os.path.join(output_folder, domain)
if not os.path.exists(folder_path):
os.makedirs(folder_path)
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
img_tags = soup.find_all("img")
for img in img_tags:
img_url = urljoin(url, img["src"])
file_name = os.path.basename(urlparse(img_url).path)
img_data = requests.get(img_url).content
img_path = os.path.join(folder_path, file_name)
with open(img_path, "wb") as file:
file.write(img_data)
print(f"✅ Downloaded: {file_name}")
print(f"✅ All images downloaded!")
for url in test_urls:
download_images(url)