From 8c95632894d339f064920ce4cdef110a61858a31 Mon Sep 17 00:00:00 2001 From: TheOnlyWayUp Date: Mon, 9 Jun 2025 22:05:39 +0000 Subject: [PATCH] fix(api): Tree images passed to Generators as AsyncGenerator --- src/api/src/create_book/generators/epub.py | 5 ++--- src/api/src/create_book/generators/pdf.py | 10 ++++++---- src/api/src/create_book/generators/types.py | 8 ++++---- src/api/src/create_book/parser.py | 18 ++++++++++-------- src/api/src/main.py | 9 ++++++--- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/api/src/create_book/generators/epub.py b/src/api/src/create_book/generators/epub.py index 7ffc50c..9261f5b 100644 --- a/src/api/src/create_book/generators/epub.py +++ b/src/api/src/create_book/generators/epub.py @@ -1,5 +1,4 @@ from io import BytesIO -from typing import Generator, List from bs4 import BeautifulSoup from ebooklib import epub @@ -12,9 +11,9 @@ class EPUBGenerator(AbstractGenerator): def __init__( self, metadata: Story, - part_trees: List[BeautifulSoup], + part_trees: list[BeautifulSoup], cover: bytes, - images: List[Generator[bytes]] | None, + images: list[list[bytes | None]], ): self.story = metadata self.parts = part_trees diff --git a/src/api/src/create_book/generators/pdf.py b/src/api/src/create_book/generators/pdf.py index 2e0802f..7def83c 100644 --- a/src/api/src/create_book/generators/pdf.py +++ b/src/api/src/create_book/generators/pdf.py @@ -2,9 +2,8 @@ from base64 import b64encode from io import BytesIO from pathlib import Path from tempfile import NamedTemporaryFile, _TemporaryFileWrapper -from typing import Generator, List, cast -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup from exiftool import ExifTool from jinja2 import Template from weasyprint import CSS, HTML @@ -87,9 +86,9 @@ class PDFGenerator(AbstractGenerator): def __init__( self, metadata: Story, - part_trees: List[BeautifulSoup], + part_trees: list[BeautifulSoup], cover: bytes, - images: List[Generator[bytes]] | None, + images: list[list[bytes | None]], author_image: bytes, ): self.story = metadata @@ -109,6 +108,9 @@ class PDFGenerator(AbstractGenerator): for img_idx, (img_data, img_tag) in enumerate( zip(self.images[idx], tree.find_all("img")) ): + if not img_data: + continue + img_tag["src"] = ( f"data:image/jpg;base64,{b64encode(img_data).decode()}" ) diff --git a/src/api/src/create_book/generators/types.py b/src/api/src/create_book/generators/types.py index 1c9341e..e78b111 100644 --- a/src/api/src/create_book/generators/types.py +++ b/src/api/src/create_book/generators/types.py @@ -1,6 +1,6 @@ from io import BytesIO from tempfile import _TemporaryFileWrapper -from typing import Generator, List, Literal +from typing import Literal from bs4 import BeautifulSoup from ebooklib.epub import EpubBook @@ -15,15 +15,15 @@ class AbstractGenerator: metadata (Story): Story Metadata. part_trees (List[BeautifulSoup]): Parsed part trees. cover (bytes): Cover image. - images (List[List[bytes]] | None): An array of images for each chapter, if images have been downloaded. + images (List[List[bytes | None]]): An array of images for each chapter, if images have been downloaded. """ def __init__( self, metadata: Story, - part_trees: List[BeautifulSoup], + part_trees: list[BeautifulSoup], cover: bytes, - images: List[Generator[bytes]] | None, + images: list[list[bytes | None]], ): self.story = metadata self.parts = part_trees diff --git a/src/api/src/create_book/parser.py b/src/api/src/create_book/parser.py index e2a8c73..0f8be61 100644 --- a/src/api/src/create_book/parser.py +++ b/src/api/src/create_book/parser.py @@ -1,6 +1,6 @@ import asyncio -from itertools import batched, chain -from typing import Generator, List, Tuple, cast +from itertools import batched +from typing import cast from aiohttp import ClientSession from bs4 import BeautifulSoup, Tag @@ -74,11 +74,13 @@ async def fetch_image(url: str) -> bytes | None: return body -async def download_tree_images(tree: BeautifulSoup) -> Generator[bytes]: +async def fetch_tree_images(tree: BeautifulSoup): """Return a Generator of bytes containing image data for all images referenced in the tree.""" image_urls = [img["src"] for img in tree.find_all("img")] - downloaded_images: Generator[bytes] = chain( - await asyncio.gather(*[fetch_image(url) for url in chunk]) - for chunk in batched(image_urls, 3) - ) - return downloaded_images + + images = [] + for chunk in batched(image_urls, 3): + for image_data in await asyncio.gather(*[fetch_image(url) for url in chunk]): + images.append(image_data) + + return images diff --git a/src/api/src/main.py b/src/api/src/main.py index 6ee19ad..5129301 100644 --- a/src/api/src/main.py +++ b/src/api/src/main.py @@ -31,7 +31,7 @@ from create_book import ( logger, slugify, ) -from create_book.parser import clean_tree, download_tree_images +from create_book.parser import clean_tree, fetch_tree_images app = FastAPI() BUILD_PATH = Path(__file__).parent / "build" @@ -181,8 +181,11 @@ async def handle_download( for part in metadata["parts"] ] - # download_images: - images = [await download_tree_images(tree) for tree in part_trees] if download_images else None + images = ( + [await fetch_tree_images(tree) for tree in part_trees] + if download_images + else [] + ) match format: case DownloadFormat.epub: