From 82270dc770946ce9d91931d207919948290f142c Mon Sep 17 00:00:00 2001 From: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com> Date: Sun, 15 Dec 2024 14:38:28 -0500 Subject: [PATCH] feat(api): Download parts as .zip --- src/api/src/create_book.py | 27 ++++++++++++++++++++++++++- src/api/src/main.py | 2 +- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/api/src/create_book.py b/src/api/src/create_book.py index 0934b62..d765641 100644 --- a/src/api/src/create_book.py +++ b/src/api/src/create_book.py @@ -17,6 +17,8 @@ from pydantic_settings import BaseSettings from aiohttp import ClientResponseError from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache import FileBackend, RedisBackend +from io import BytesIO +from zipfile import ZipFile load_dotenv(override=True) @@ -251,6 +253,26 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st return body +@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) +async def fetch_story_zip(story_id: int, cookies: Optional[dict] = None) -> BytesIO: + """Return a BytesIO stream of a .zip file containing each part's HTML content.""" + with start_action(action_type="api_fetch_storyZip", story_id=story_id): + async with CachedSession( + headers=headers, + cookies=cookies, + cache=None if cookies else cache, + ) as session: + async with session.get( + f"https://www.wattpad.com/apiv2/?m=storytext&group_id={story_id}&output=zip" + ) as response: + response.raise_for_status() + + bytes_object = await response.read() + bytes_stream = BytesIO(bytes_object) + + return bytes_stream + + @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) async def fetch_cover(url: str) -> bytes: """Fetch cover image bytes.""" @@ -307,8 +329,11 @@ async def add_chapters( ): chapters = [] + story_zip = await fetch_story_zip(data["id"], cookies) + archive = ZipFile(story_zip, "r") + for cidx, part in enumerate(data["parts"]): - content = await fetch_part_content(part["id"], cookies=cookies) + content = archive.read(str(part["id"])).decode("utf-8") title = part["title"] # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 diff --git a/src/api/src/main.py b/src/api/src/main.py index ac5be01..a9ab583 100644 --- a/src/api/src/main.py +++ b/src/api/src/main.py @@ -165,7 +165,7 @@ async def handle_download( temp_file.file.seek(0) book_data = temp_file.file.read() - + return StreamingResponse( BytesIO(book_data), media_type="application/epub+zip",