Discussion about this post

User's avatar
VINOD VIJAYAN's avatar

Here my function to download images from url:

import os

import requests

from bs4 import BeautifulSoup

from urllib.parse import urljoin, urlparse

test_urls = [

"https://books.toscrape.com/",

"https://en.wikipedia.org/wiki/Tomato"

]

def download_images(url, output_folder="downloaded_images"):

domain = urlparse(url).netloc.replace(".", "_")

folder_path = os.path.join(output_folder, domain)

if not os.path.exists(folder_path):

os.makedirs(folder_path)

response = requests.get(url)

soup = BeautifulSoup(response.text, "html.parser")

img_tags = soup.find_all("img")

for img in img_tags:

img_url = urljoin(url, img["src"])

file_name = os.path.basename(urlparse(img_url).path)

img_data = requests.get(img_url).content

img_path = os.path.join(folder_path, file_name)

with open(img_path, "wb") as file:

file.write(img_data)

print(f"✅ Downloaded: {file_name}")

print(f"✅ All images downloaded!")

for url in test_urls:

download_images(url)

Expand full comment

No posts