From db6c841e2f109bde231c84a6bf4ed82edeaed794 Mon Sep 17 00:00:00 2001 From: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com> Date: Sun, 22 Jun 2025 15:50:30 -0400 Subject: [PATCH] fix(api): Validate image URLs before trying to download --- src/api/src/create_book/parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/api/src/create_book/parser.py b/src/api/src/create_book/parser.py index eafb1c5..3daa3ca 100644 --- a/src/api/src/create_book/parser.py +++ b/src/api/src/create_book/parser.py @@ -5,6 +5,7 @@ from typing import cast from aiohttp import ClientSession from bs4 import BeautifulSoup, Tag from eliot import start_action +from urllib.parse import urlparse from .vars import headers @@ -76,7 +77,12 @@ async def fetch_image(url: str) -> bytes | None: async def fetch_tree_images(tree: BeautifulSoup): """Return a Generator of bytes containing image data for all images referenced in the tree.""" - image_urls = [img["src"] for img in tree.find_all("img")] + + image_urls = [] + for img in tree.find_all("img"): + parsed = urlparse(img["src"]) + if parsed.scheme and parsed.netloc: # Test if valid URL + image_urls.append(img["src"]) images = [] for chunk in batched(image_urls, 3):