From e53ba34bac7d67257be0a0c2b03abe7010dceed1 Mon Sep 17 00:00:00 2001
From: Aron BenDaniel <144371000+AaronBenDaniel@users.noreply.github.com>
Date: Fri, 14 Nov 2025 14:57:25 -0500
Subject: [PATCH] list-downloading v3
---
src/api/src/create_book/__init__.py | 2 +
src/api/src/create_book/create_book.py | 24 +++-
src/api/src/create_book/models.py | 8 +-
src/api/src/main.py | 185 +++++++++++++++++--------
src/frontend/src/routes/+page.svelte | 8 +-
5 files changed, 168 insertions(+), 59 deletions(-)
diff --git a/src/api/src/create_book/__init__.py b/src/api/src/create_book/__init__.py
index 880237d..3deab1d 100644
--- a/src/api/src/create_book/__init__.py
+++ b/src/api/src/create_book/__init__.py
@@ -5,9 +5,11 @@ from .create_book import (
fetch_story,
fetch_story_content_zip,
fetch_story_from_partId,
+ fetch_list,
)
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator
from .logs import logger
from .parser import fetch_image
from .utils import slugify
+from .models import Story, List
diff --git a/src/api/src/create_book/create_book.py b/src/api/src/create_book/create_book.py
index b69ae93..171df89 100644
--- a/src/api/src/create_book/create_book.py
+++ b/src/api/src/create_book/create_book.py
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger
-from .models import Story
+from .models import Story, List
from .vars import cache, headers
story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies.
async with session.get(
- f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
+ f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache
) as session:
async with session.get(
- f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
+ f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
) as response:
body = await response.json()
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read())
return bytes_stream
+
+
+@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
+async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
+ """Fetch List metadata from a List ID."""
+ with start_action(action_type="api_fetch_list", list_id=list_id):
+ async with CachedSession(
+ headers=headers,
+ cookies=cookies,
+ cache=None if cookies else cache,
+ ) as session: # Don't cache requests with Cookies.
+ async with session.get(
+ f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
+ ) as response:
+ response.raise_for_status()
+ body = await response.json()
+
+ return body
diff --git a/src/api/src/create_book/models.py b/src/api/src/create_book/models.py
index 53ef697..d0231f6 100644
--- a/src/api/src/create_book/models.py
+++ b/src/api/src/create_book/models.py
@@ -1,4 +1,4 @@
-from typing import Optional, TypedDict
+from typing import Optional, TypedDict, NotRequired
class CopyrightData(TypedDict):
@@ -22,6 +22,7 @@ class User(TypedDict):
class Part(TypedDict):
id: int
title: str
+ deleted: NotRequired[bool]
class Story(TypedDict):
@@ -40,3 +41,8 @@ class Story(TypedDict):
parts: list[Part]
isPaywalled: bool
copyright: int
+
+
+class List(TypedDict):
+ name: str
+ stories: list[Story]
diff --git a/src/api/src/main.py b/src/api/src/main.py
index 854676a..9e84699 100644
--- a/src/api/src/main.py
+++ b/src/api/src/main.py
@@ -3,6 +3,7 @@
import asyncio
from enum import Enum
from os import getenv
+from io import BytesIO
from pathlib import Path
from typing import Optional
from zipfile import ZipFile
@@ -29,8 +30,11 @@ from create_book import (
fetch_story,
fetch_story_content_zip,
fetch_story_from_partId,
+ fetch_list,
logger,
slugify,
+ Story,
+ List,
)
from create_book.parser import clean_tree, fetch_tree_images
@@ -85,6 +89,93 @@ class DownloadFormat(Enum):
class DownloadMode(Enum):
story = "story"
part = "part"
+ list = "list"
+
+
+async def download_story(
+ metadata: Story,
+ download_images: bool = False,
+ format: DownloadFormat = DownloadFormat.epub,
+ cookies: dict = None,
+) -> BytesIO:
+ with start_action(
+ action_type="download_story",
+ story_id=metadata["id"],
+ download_images=download_images,
+ format=format,
+ ):
+ # Fetch cover image
+ cover_data = await fetch_image(
+ metadata["cover"].replace("-256-", "-512-")
+ ) # Increase resolution
+ if not cover_data:
+ raise HTTPException(status_code=422)
+
+ # Fetch parts archive
+ story_zip = await fetch_story_content_zip(metadata["id"], cookies)
+ archive = ZipFile(story_zip, "r")
+
+ # Parse part content
+ part_trees: list[BeautifulSoup] = []
+
+ for part in metadata["parts"]:
+ if "deleted" in part and part["deleted"]:
+ continue
+ part_trees.append(
+ clean_tree(
+ part["title"],
+ part["id"],
+ archive.read(str(part["id"])).decode("utf-8"),
+ )
+ )
+
+ # Fetch images
+ images = (
+ [await fetch_tree_images(tree) for tree in part_trees]
+ if download_images
+ else []
+ )
+
+ # Build output file
+ match format:
+ case DownloadFormat.epub:
+ book = EPUBGenerator(metadata, part_trees, cover_data, images)
+ case DownloadFormat.pdf:
+ # Fetch author profile picture
+ author_image = await fetch_image(
+ metadata["user"]["avatar"].replace("-256-", "-512-")
+ )
+ if not author_image:
+ raise HTTPException(status_code=422)
+
+ book = PDFGenerator(
+ metadata, part_trees, cover_data, images, author_image
+ )
+
+ logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
+
+ book.compile()
+
+ return book.dump()
+
+
+async def download_list(
+ metadata: List,
+ download_images: bool = False,
+ format: DownloadFormat = DownloadFormat.epub,
+ cookies: dict = None,
+) -> BytesIO:
+ output_buffer = BytesIO()
+
+ with ZipFile(output_buffer, "w") as archive:
+ for story in metadata["stories"]:
+ story_file = await download_story(story, download_images, format, cookies)
+ file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
+ archive.writestr(file_name, story_file.read())
+
+ output_buffer.seek(0)
+
+ return output_buffer
@app.get("/")
@@ -133,7 +224,7 @@ async def handle_download(
password: Optional[str] = None,
):
with start_action(
- action_type="download",
+ action_type="handle_download",
download_id=download_id,
download_images=download_images,
format=format,
@@ -161,75 +252,61 @@ async def handle_download(
else:
cookies = None
- if format == DownloadFormat.pdf and not PDFS_ENABLED:
- logger.error("PDF Downloads not enabled.")
- return HTMLResponse(
- status_code=403,
- content='PDF Downloads have been disabled by the server administrator. Support is available on the Discord',
- )
+ match format:
+ case DownloadFormat.epub:
+ media_type = "application/epub+zip"
+ extension = "epub"
+ case DownloadFormat.pdf:
+ if not PDFS_ENABLED:
+ logger.error("PDF Downloads not enabled.")
+ return HTMLResponse(
+ status_code=403,
+ content='PDF Downloads have been disabled by the server administrator. Support is available on the Discord',
+ )
+
+ media_type = "application/pdf"
+ extension = "pdf"
match mode:
case DownloadMode.story:
- story_id = download_id
- metadata = await fetch_story(story_id, cookies)
+ metadata = await fetch_story(download_id, cookies)
+ output_buffer = await download_story(
+ metadata, download_images, format, cookies
+ )
case DownloadMode.part:
- story_id, metadata = await fetch_story_from_partId(download_id, cookies)
-
- cover_data = await fetch_image(
- metadata["cover"].replace("-256-", "-512-")
- ) # Increase resolution
- if not cover_data:
- raise HTTPException(status_code=422)
-
- story_zip = await fetch_story_content_zip(story_id, cookies)
- archive = ZipFile(story_zip, "r")
-
- part_trees: list[BeautifulSoup] = [
- clean_tree(
- part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
- )
- for part in metadata["parts"]
- ]
-
- images = (
- [await fetch_tree_images(tree) for tree in part_trees]
- if download_images
- else []
- )
-
- match format:
- case DownloadFormat.epub:
- book = EPUBGenerator(metadata, part_trees, cover_data, images)
- media_type = "application/epub+zip"
- case DownloadFormat.pdf:
- author_image = await fetch_image(
- metadata["user"]["avatar"].replace("-256-", "-512-")
+ download_id, metadata = await fetch_story_from_partId(
+ download_id, cookies
)
- if not author_image:
- raise HTTPException(status_code=422)
-
- book = PDFGenerator(
- metadata, part_trees, cover_data, images, author_image
+ output_buffer = await download_story(
+ metadata, download_images, format, cookies
)
- media_type = "application/pdf"
+ case DownloadMode.list:
+ if not PDFS_ENABLED:
+ logger.error("List Downloads not enabled.")
+ return HTMLResponse(
+ status_code=403,
+ content='List Downloads have been disabled by the server administrator. Support is available on the Discord',
+ )
- logger.info(f"Retrieved story metadata and cover ({story_id=})")
+ metadata = await fetch_list(download_id, cookies)
+ output_buffer = await download_list(
+ metadata, download_images, format, cookies
+ )
- book.compile()
-
- book_buffer = book.dump()
+ media_type = "application/zip"
+ extension = "zip"
async def iterfile():
- while chunk := book_buffer.read(512 * 4): # 4 kb/s
+ while chunk := output_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed
yield chunk
return StreamingResponse(
- book_buffer if PDFS_ENABLED else iterfile(),
+ output_buffer if PDFS_ENABLED else iterfile(),
media_type=media_type,
headers={
- "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058
- "Content-Length": str(book_buffer.getbuffer().nbytes),
+ "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
+ "Content-Length": str(output_buffer.getbuffer().nbytes),
},
)
diff --git a/src/frontend/src/routes/+page.svelte b/src/frontend/src/routes/+page.svelte
index d8459e2..3fcee9c 100644
--- a/src/frontend/src/routes/+page.svelte
+++ b/src/frontend/src/routes/+page.svelte
@@ -11,7 +11,7 @@
password: ""
});
let downloadId = $state("");
- /** @type {"story" | "part" | ""} */
+ /** @type {"story" | "part" | "list" |""} */
let mode = $state("");
let inputUrl = $state("");
@@ -83,6 +83,12 @@
setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params
);
+ } else if (input.includes("/list/")) {
+ // https://www.wattpad.com/list/829974064
+ mode = "list";
+ setInputAsValid(
+ input.split("?", 1)[0].split("/list/")[1] // removes params
+ );
} else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title