From e53ba34bac7d67257be0a0c2b03abe7010dceed1 Mon Sep 17 00:00:00 2001 From: Aron BenDaniel <144371000+AaronBenDaniel@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:57:25 -0500 Subject: [PATCH] list-downloading v3 --- src/api/src/create_book/__init__.py | 2 + src/api/src/create_book/create_book.py | 24 +++- src/api/src/create_book/models.py | 8 +- src/api/src/main.py | 185 +++++++++++++++++-------- src/frontend/src/routes/+page.svelte | 8 +- 5 files changed, 168 insertions(+), 59 deletions(-) diff --git a/src/api/src/create_book/__init__.py b/src/api/src/create_book/__init__.py index 880237d..3deab1d 100644 --- a/src/api/src/create_book/__init__.py +++ b/src/api/src/create_book/__init__.py @@ -5,9 +5,11 @@ from .create_book import ( fetch_story, fetch_story_content_zip, fetch_story_from_partId, + fetch_list, ) from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError from .generators import EPUBGenerator, PDFGenerator from .logs import logger from .parser import fetch_image from .utils import slugify +from .models import Story, List diff --git a/src/api/src/create_book/create_book.py b/src/api/src/create_book/create_book.py index b69ae93..171df89 100644 --- a/src/api/src/create_book/create_book.py +++ b/src/api/src/create_book/create_book.py @@ -11,7 +11,7 @@ from pydantic import TypeAdapter from .exceptions import PartNotFoundError, StoryNotFoundError from .logs import logger -from .models import Story +from .models import Story, List from .vars import cache, headers story_ta = TypeAdapter(Story) @@ -70,7 +70,7 @@ async def fetch_story_from_partId( headers=headers, cache=None if cookies else cache ) as session: # Don't cache requests with Cookies. async with session.get( - f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)" + f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)" ) as response: body = await response.json() @@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story: headers=headers, cookies=cookies, cache=None if cookies else cache ) as session: async with session.get( - f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright" + f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright" ) as response: body = await response.json() @@ -127,3 +127,21 @@ async def fetch_story_content_zip( bytes_stream = BytesIO(await response.read()) return bytes_stream + + +@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) +async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List: + """Fetch List metadata from a List ID.""" + with start_action(action_type="api_fetch_list", list_id=list_id): + async with CachedSession( + headers=headers, + cookies=cookies, + cache=None if cookies else cache, + ) as session: # Don't cache requests with Cookies. + async with session.get( + f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)" + ) as response: + response.raise_for_status() + body = await response.json() + + return body diff --git a/src/api/src/create_book/models.py b/src/api/src/create_book/models.py index 53ef697..d0231f6 100644 --- a/src/api/src/create_book/models.py +++ b/src/api/src/create_book/models.py @@ -1,4 +1,4 @@ -from typing import Optional, TypedDict +from typing import Optional, TypedDict, NotRequired class CopyrightData(TypedDict): @@ -22,6 +22,7 @@ class User(TypedDict): class Part(TypedDict): id: int title: str + deleted: NotRequired[bool] class Story(TypedDict): @@ -40,3 +41,8 @@ class Story(TypedDict): parts: list[Part] isPaywalled: bool copyright: int + + +class List(TypedDict): + name: str + stories: list[Story] diff --git a/src/api/src/main.py b/src/api/src/main.py index 854676a..9e84699 100644 --- a/src/api/src/main.py +++ b/src/api/src/main.py @@ -3,6 +3,7 @@ import asyncio from enum import Enum from os import getenv +from io import BytesIO from pathlib import Path from typing import Optional from zipfile import ZipFile @@ -29,8 +30,11 @@ from create_book import ( fetch_story, fetch_story_content_zip, fetch_story_from_partId, + fetch_list, logger, slugify, + Story, + List, ) from create_book.parser import clean_tree, fetch_tree_images @@ -85,6 +89,93 @@ class DownloadFormat(Enum): class DownloadMode(Enum): story = "story" part = "part" + list = "list" + + +async def download_story( + metadata: Story, + download_images: bool = False, + format: DownloadFormat = DownloadFormat.epub, + cookies: dict = None, +) -> BytesIO: + with start_action( + action_type="download_story", + story_id=metadata["id"], + download_images=download_images, + format=format, + ): + # Fetch cover image + cover_data = await fetch_image( + metadata["cover"].replace("-256-", "-512-") + ) # Increase resolution + if not cover_data: + raise HTTPException(status_code=422) + + # Fetch parts archive + story_zip = await fetch_story_content_zip(metadata["id"], cookies) + archive = ZipFile(story_zip, "r") + + # Parse part content + part_trees: list[BeautifulSoup] = [] + + for part in metadata["parts"]: + if "deleted" in part and part["deleted"]: + continue + part_trees.append( + clean_tree( + part["title"], + part["id"], + archive.read(str(part["id"])).decode("utf-8"), + ) + ) + + # Fetch images + images = ( + [await fetch_tree_images(tree) for tree in part_trees] + if download_images + else [] + ) + + # Build output file + match format: + case DownloadFormat.epub: + book = EPUBGenerator(metadata, part_trees, cover_data, images) + case DownloadFormat.pdf: + # Fetch author profile picture + author_image = await fetch_image( + metadata["user"]["avatar"].replace("-256-", "-512-") + ) + if not author_image: + raise HTTPException(status_code=422) + + book = PDFGenerator( + metadata, part_trees, cover_data, images, author_image + ) + + logger.info(f"Retrieved story metadata and cover ({metadata['id']=})") + + book.compile() + + return book.dump() + + +async def download_list( + metadata: List, + download_images: bool = False, + format: DownloadFormat = DownloadFormat.epub, + cookies: dict = None, +) -> BytesIO: + output_buffer = BytesIO() + + with ZipFile(output_buffer, "w") as archive: + for story in metadata["stories"]: + story_file = await download_story(story, download_images, format, cookies) + file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}" + archive.writestr(file_name, story_file.read()) + + output_buffer.seek(0) + + return output_buffer @app.get("/") @@ -133,7 +224,7 @@ async def handle_download( password: Optional[str] = None, ): with start_action( - action_type="download", + action_type="handle_download", download_id=download_id, download_images=download_images, format=format, @@ -161,75 +252,61 @@ async def handle_download( else: cookies = None - if format == DownloadFormat.pdf and not PDFS_ENABLED: - logger.error("PDF Downloads not enabled.") - return HTMLResponse( - status_code=403, - content='PDF Downloads have been disabled by the server administrator. Support is available on the Discord', - ) + match format: + case DownloadFormat.epub: + media_type = "application/epub+zip" + extension = "epub" + case DownloadFormat.pdf: + if not PDFS_ENABLED: + logger.error("PDF Downloads not enabled.") + return HTMLResponse( + status_code=403, + content='PDF Downloads have been disabled by the server administrator. Support is available on the Discord', + ) + + media_type = "application/pdf" + extension = "pdf" match mode: case DownloadMode.story: - story_id = download_id - metadata = await fetch_story(story_id, cookies) + metadata = await fetch_story(download_id, cookies) + output_buffer = await download_story( + metadata, download_images, format, cookies + ) case DownloadMode.part: - story_id, metadata = await fetch_story_from_partId(download_id, cookies) - - cover_data = await fetch_image( - metadata["cover"].replace("-256-", "-512-") - ) # Increase resolution - if not cover_data: - raise HTTPException(status_code=422) - - story_zip = await fetch_story_content_zip(story_id, cookies) - archive = ZipFile(story_zip, "r") - - part_trees: list[BeautifulSoup] = [ - clean_tree( - part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8") - ) - for part in metadata["parts"] - ] - - images = ( - [await fetch_tree_images(tree) for tree in part_trees] - if download_images - else [] - ) - - match format: - case DownloadFormat.epub: - book = EPUBGenerator(metadata, part_trees, cover_data, images) - media_type = "application/epub+zip" - case DownloadFormat.pdf: - author_image = await fetch_image( - metadata["user"]["avatar"].replace("-256-", "-512-") + download_id, metadata = await fetch_story_from_partId( + download_id, cookies ) - if not author_image: - raise HTTPException(status_code=422) - - book = PDFGenerator( - metadata, part_trees, cover_data, images, author_image + output_buffer = await download_story( + metadata, download_images, format, cookies ) - media_type = "application/pdf" + case DownloadMode.list: + if not PDFS_ENABLED: + logger.error("List Downloads not enabled.") + return HTMLResponse( + status_code=403, + content='List Downloads have been disabled by the server administrator. Support is available on the Discord', + ) - logger.info(f"Retrieved story metadata and cover ({story_id=})") + metadata = await fetch_list(download_id, cookies) + output_buffer = await download_list( + metadata, download_images, format, cookies + ) - book.compile() - - book_buffer = book.dump() + media_type = "application/zip" + extension = "zip" async def iterfile(): - while chunk := book_buffer.read(512 * 4): # 4 kb/s + while chunk := output_buffer.read(512 * 4): # 4 kb/s await asyncio.sleep(0.1) # throttle download speed yield chunk return StreamingResponse( - book_buffer if PDFS_ENABLED else iterfile(), + output_buffer if PDFS_ENABLED else iterfile(), media_type=media_type, headers={ - "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058 - "Content-Length": str(book_buffer.getbuffer().nbytes), + "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058 + "Content-Length": str(output_buffer.getbuffer().nbytes), }, ) diff --git a/src/frontend/src/routes/+page.svelte b/src/frontend/src/routes/+page.svelte index d8459e2..3fcee9c 100644 --- a/src/frontend/src/routes/+page.svelte +++ b/src/frontend/src/routes/+page.svelte @@ -11,7 +11,7 @@ password: "" }); let downloadId = $state(""); - /** @type {"story" | "part" | ""} */ + /** @type {"story" | "part" | "list" |""} */ let mode = $state(""); let inputUrl = $state(""); @@ -83,6 +83,12 @@ setInputAsValid( input.split("?", 1)[0].split("/stories/")[1] // removes params ); + } else if (input.includes("/list/")) { + // https://www.wattpad.com/list/829974064 + mode = "list"; + setInputAsValid( + input.split("?", 1)[0].split("/list/")[1] // removes params + ); } else { // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title