list-downloading v3

This commit is contained in:
Aron BenDaniel
2025-11-14 14:57:25 -05:00
parent 943846a88b
commit e53ba34bac
5 changed files with 168 additions and 59 deletions
+2
View File
@@ -5,9 +5,11 @@ from .create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
) )
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator from .generators import EPUBGenerator, PDFGenerator
from .logs import logger from .logs import logger
from .parser import fetch_image from .parser import fetch_image
from .utils import slugify from .utils import slugify
from .models import Story, List
+21 -3
View File
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger from .logs import logger
from .models import Story from .models import Story, List
from .vars import cache, headers from .vars import cache, headers
story_ta = TypeAdapter(Story) story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies. ) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)" f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache headers=headers, cookies=cookies, cache=None if cookies else cache
) as session: ) as session:
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright" f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read()) bytes_stream = BytesIO(await response.read())
return bytes_stream return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
"""Fetch List metadata from a List ID."""
with start_action(action_type="api_fetch_list", list_id=list_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
response.raise_for_status()
body = await response.json()
return body
+7 -1
View File
@@ -1,4 +1,4 @@
from typing import Optional, TypedDict from typing import Optional, TypedDict, NotRequired
class CopyrightData(TypedDict): class CopyrightData(TypedDict):
@@ -22,6 +22,7 @@ class User(TypedDict):
class Part(TypedDict): class Part(TypedDict):
id: int id: int
title: str title: str
deleted: NotRequired[bool]
class Story(TypedDict): class Story(TypedDict):
@@ -40,3 +41,8 @@ class Story(TypedDict):
parts: list[Part] parts: list[Part]
isPaywalled: bool isPaywalled: bool
copyright: int copyright: int
class List(TypedDict):
name: str
stories: list[Story]
+125 -48
View File
@@ -3,6 +3,7 @@
import asyncio import asyncio
from enum import Enum from enum import Enum
from os import getenv from os import getenv
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from zipfile import ZipFile from zipfile import ZipFile
@@ -29,8 +30,11 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
logger, logger,
slugify, slugify,
Story,
List,
) )
from create_book.parser import clean_tree, fetch_tree_images from create_book.parser import clean_tree, fetch_tree_images
@@ -85,6 +89,93 @@ class DownloadFormat(Enum):
class DownloadMode(Enum): class DownloadMode(Enum):
story = "story" story = "story"
part = "part" part = "part"
list = "list"
async def download_story(
metadata: Story,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
with start_action(
action_type="download_story",
story_id=metadata["id"],
download_images=download_images,
format=format,
):
# Fetch cover image
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
# Fetch parts archive
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
archive = ZipFile(story_zip, "r")
# Parse part content
part_trees: list[BeautifulSoup] = []
for part in metadata["parts"]:
if "deleted" in part and part["deleted"]:
continue
part_trees.append(
clean_tree(
part["title"],
part["id"],
archive.read(str(part["id"])).decode("utf-8"),
)
)
# Fetch images
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
# Build output file
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
case DownloadFormat.pdf:
# Fetch author profile picture
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
book.compile()
return book.dump()
async def download_list(
metadata: List,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
output_buffer = BytesIO()
with ZipFile(output_buffer, "w") as archive:
for story in metadata["stories"]:
story_file = await download_story(story, download_images, format, cookies)
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
archive.writestr(file_name, story_file.read())
output_buffer.seek(0)
return output_buffer
@app.get("/") @app.get("/")
@@ -133,7 +224,7 @@ async def handle_download(
password: Optional[str] = None, password: Optional[str] = None,
): ):
with start_action( with start_action(
action_type="download", action_type="handle_download",
download_id=download_id, download_id=download_id,
download_images=download_images, download_images=download_images,
format=format, format=format,
@@ -161,75 +252,61 @@ async def handle_download(
else: else:
cookies = None cookies = None
if format == DownloadFormat.pdf and not PDFS_ENABLED: match format:
case DownloadFormat.epub:
media_type = "application/epub+zip"
extension = "epub"
case DownloadFormat.pdf:
if not PDFS_ENABLED:
logger.error("PDF Downloads not enabled.") logger.error("PDF Downloads not enabled.")
return HTMLResponse( return HTMLResponse(
status_code=403, status_code=403,
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>', content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
) )
media_type = "application/pdf"
extension = "pdf"
match mode: match mode:
case DownloadMode.story: case DownloadMode.story:
story_id = download_id metadata = await fetch_story(download_id, cookies)
metadata = await fetch_story(story_id, cookies) output_buffer = await download_story(
metadata, download_images, format, cookies
)
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies) download_id, metadata = await fetch_story_from_partId(
download_id, cookies
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
part_trees: list[BeautifulSoup] = [
clean_tree(
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
) )
for part in metadata["parts"] output_buffer = await download_story(
] metadata, download_images, format, cookies
)
images = ( case DownloadMode.list:
[await fetch_tree_images(tree) for tree in part_trees] if not PDFS_ENABLED:
if download_images logger.error("List Downloads not enabled.")
else [] return HTMLResponse(
status_code=403,
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
) )
match format: metadata = await fetch_list(download_id, cookies)
case DownloadFormat.epub: output_buffer = await download_list(
book = EPUBGenerator(metadata, part_trees, cover_data, images) metadata, download_images, format, cookies
media_type = "application/epub+zip"
case DownloadFormat.pdf:
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
) )
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator( media_type = "application/zip"
metadata, part_trees, cover_data, images, author_image extension = "zip"
)
media_type = "application/pdf"
logger.info(f"Retrieved story metadata and cover ({story_id=})")
book.compile()
book_buffer = book.dump()
async def iterfile(): async def iterfile():
while chunk := book_buffer.read(512 * 4): # 4 kb/s while chunk := output_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed await asyncio.sleep(0.1) # throttle download speed
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(
book_buffer if PDFS_ENABLED else iterfile(), output_buffer if PDFS_ENABLED else iterfile(),
media_type=media_type, media_type=media_type,
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(book_buffer.getbuffer().nbytes), "Content-Length": str(output_buffer.getbuffer().nbytes),
}, },
) )
+7 -1
View File
@@ -11,7 +11,7 @@
password: "" password: ""
}); });
let downloadId = $state(""); let downloadId = $state("");
/** @type {"story" | "part" | ""} */ /** @type {"story" | "part" | "list" |""} */
let mode = $state(""); let mode = $state("");
let inputUrl = $state(""); let inputUrl = $state("");
@@ -83,6 +83,12 @@
setInputAsValid( setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params input.split("?", 1)[0].split("/stories/")[1] // removes params
); );
} else if (input.includes("/list/")) {
// https://www.wattpad.com/list/829974064
mode = "list";
setInputAsValid(
input.split("?", 1)[0].split("/list/")[1] // removes params
);
} else { } else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title