8 Commits

Author SHA1 Message Date
Aron BenDaniel e53ba34bac list-downloading v3 2025-11-14 14:57:25 -05:00
Aron BenDaniel 943846a88b Merge remote-tracking branch 'TheOnlyWayUp/fix/#85-image-size' into list-downloading 2025-11-14 14:39:25 -05:00
TheOnlyWayUp a84d4edb6b fix(docker): Install libraries for weasyprint 2025-11-10 00:29:38 +05:30
TheOnlyWayUp 1379f416bc fix(docker): .dockerignore targets subdirectories 2025-11-10 00:29:09 +05:30
TheOnlyWayUp 83466ded4d fix(docker): Remove cache files after install 2025-11-10 00:05:09 +05:30
Aron BenDaniel 6c1a145577 docker: Fix .dockerignore uv.lock entry 2025-10-31 05:49:08 +05:30
Dhanush R cc9ac6093f feat(api): Remove dependency on exiftool (#82) 2025-10-30 16:11:16 +05:30
Aaron BenDaniel fa60de79fd Update README.md copyright year
The future is now.
2025-10-29 01:44:54 +05:30
8 changed files with 186 additions and 79 deletions
+11 -10
View File
@@ -1,10 +1,11 @@
__pycache__ **/__pycache__/
*ipynb **/*.ipynb
build **/build/
.idea .idea/
.vscode .vscode/
.venv **/.venv/
.env **/.env
*log **/.env_template
*.md **/*.log
uv.lock **/*.md
src/api/uv.lock
+6 -9
View File
@@ -1,4 +1,4 @@
FROM node:20 FROM node:20-alpine
WORKDIR /build WORKDIR /build
COPY src/frontend/package*.json . COPY src/frontend/package*.json .
@@ -17,15 +17,12 @@ FROM python:3.13-slim
WORKDIR /app WORKDIR /app
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/ RUN apt update && \
apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
RUN apt update apt clean && \
RUN apt install -y aria2 rm -rf /var/lib/apt/lists/*
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13 # aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
# weasyprint depends on libgoject, libpango, and libpangoft2 # weasyprint depends on libgoject, libpango, and libpangoft2
RUN rm -rf /var/lib/apt/lists/*
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950 # https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
@@ -36,7 +33,7 @@ WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY src/api/pyproject.toml /app COPY src/api/pyproject.toml /app
RUN uv sync RUN uv sync && uv cache clean
COPY src/api/ /app COPY src/api/ /app
COPY --from=0 /build/build /app/src/build COPY --from=0 /build/build /app/src/build
+1 -1
View File
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
--- ---
<div align="center"> <div align="center">
<p>TheOnlyWayUp © 2024</p> <p>TheOnlyWayUp © 2025</p>
</div> </div>
+2
View File
@@ -5,9 +5,11 @@ from .create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
) )
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator from .generators import EPUBGenerator, PDFGenerator
from .logs import logger from .logs import logger
from .parser import fetch_image from .parser import fetch_image
from .utils import slugify from .utils import slugify
from .models import Story, List
+21 -3
View File
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger from .logs import logger
from .models import Story from .models import Story, List
from .vars import cache, headers from .vars import cache, headers
story_ta = TypeAdapter(Story) story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies. ) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)" f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache headers=headers, cookies=cookies, cache=None if cookies else cache
) as session: ) as session:
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright" f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read()) bytes_stream = BytesIO(await response.read())
return bytes_stream return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
"""Fetch List metadata from a List ID."""
with start_action(action_type="api_fetch_list", list_id=list_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
response.raise_for_status()
body = await response.json()
return body
+7 -1
View File
@@ -1,4 +1,4 @@
from typing import Optional, TypedDict from typing import Optional, TypedDict, NotRequired
class CopyrightData(TypedDict): class CopyrightData(TypedDict):
@@ -22,6 +22,7 @@ class User(TypedDict):
class Part(TypedDict): class Part(TypedDict):
id: int id: int
title: str title: str
deleted: NotRequired[bool]
class Story(TypedDict): class Story(TypedDict):
@@ -40,3 +41,8 @@ class Story(TypedDict):
parts: list[Part] parts: list[Part]
isPaywalled: bool isPaywalled: bool
copyright: int copyright: int
class List(TypedDict):
name: str
stories: list[Story]
+131 -54
View File
@@ -3,6 +3,7 @@
import asyncio import asyncio
from enum import Enum from enum import Enum
from os import getenv from os import getenv
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from zipfile import ZipFile from zipfile import ZipFile
@@ -29,8 +30,11 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
logger, logger,
slugify, slugify,
Story,
List,
) )
from create_book.parser import clean_tree, fetch_tree_images from create_book.parser import clean_tree, fetch_tree_images
@@ -85,6 +89,93 @@ class DownloadFormat(Enum):
class DownloadMode(Enum): class DownloadMode(Enum):
story = "story" story = "story"
part = "part" part = "part"
list = "list"
async def download_story(
metadata: Story,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
with start_action(
action_type="download_story",
story_id=metadata["id"],
download_images=download_images,
format=format,
):
# Fetch cover image
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
# Fetch parts archive
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
archive = ZipFile(story_zip, "r")
# Parse part content
part_trees: list[BeautifulSoup] = []
for part in metadata["parts"]:
if "deleted" in part and part["deleted"]:
continue
part_trees.append(
clean_tree(
part["title"],
part["id"],
archive.read(str(part["id"])).decode("utf-8"),
)
)
# Fetch images
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
# Build output file
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
case DownloadFormat.pdf:
# Fetch author profile picture
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
book.compile()
return book.dump()
async def download_list(
metadata: List,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
output_buffer = BytesIO()
with ZipFile(output_buffer, "w") as archive:
for story in metadata["stories"]:
story_file = await download_story(story, download_images, format, cookies)
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
archive.writestr(file_name, story_file.read())
output_buffer.seek(0)
return output_buffer
@app.get("/") @app.get("/")
@@ -133,7 +224,7 @@ async def handle_download(
password: Optional[str] = None, password: Optional[str] = None,
): ):
with start_action( with start_action(
action_type="download", action_type="handle_download",
download_id=download_id, download_id=download_id,
download_images=download_images, download_images=download_images,
format=format, format=format,
@@ -161,75 +252,61 @@ async def handle_download(
else: else:
cookies = None cookies = None
if format == DownloadFormat.pdf and not PDFS_ENABLED: match format:
logger.error("PDF Downloads not enabled.") case DownloadFormat.epub:
return HTMLResponse( media_type = "application/epub+zip"
status_code=403, extension = "epub"
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>', case DownloadFormat.pdf:
) if not PDFS_ENABLED:
logger.error("PDF Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
media_type = "application/pdf"
extension = "pdf"
match mode: match mode:
case DownloadMode.story: case DownloadMode.story:
story_id = download_id metadata = await fetch_story(download_id, cookies)
metadata = await fetch_story(story_id, cookies) output_buffer = await download_story(
metadata, download_images, format, cookies
)
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies) download_id, metadata = await fetch_story_from_partId(
download_id, cookies
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
part_trees: list[BeautifulSoup] = [
clean_tree(
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
)
for part in metadata["parts"]
]
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
media_type = "application/epub+zip"
case DownloadFormat.pdf:
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
) )
if not author_image: output_buffer = await download_story(
raise HTTPException(status_code=422) metadata, download_images, format, cookies
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
) )
media_type = "application/pdf" case DownloadMode.list:
if not PDFS_ENABLED:
logger.error("List Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
logger.info(f"Retrieved story metadata and cover ({story_id=})") metadata = await fetch_list(download_id, cookies)
output_buffer = await download_list(
metadata, download_images, format, cookies
)
book.compile() media_type = "application/zip"
extension = "zip"
book_buffer = book.dump()
async def iterfile(): async def iterfile():
while chunk := book_buffer.read(512 * 4): # 4 kb/s while chunk := output_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed await asyncio.sleep(0.1) # throttle download speed
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(
book_buffer if PDFS_ENABLED else iterfile(), output_buffer if PDFS_ENABLED else iterfile(),
media_type=media_type, media_type=media_type,
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(book_buffer.getbuffer().nbytes), "Content-Length": str(output_buffer.getbuffer().nbytes),
}, },
) )
+7 -1
View File
@@ -11,7 +11,7 @@
password: "" password: ""
}); });
let downloadId = $state(""); let downloadId = $state("");
/** @type {"story" | "part" | ""} */ /** @type {"story" | "part" | "list" |""} */
let mode = $state(""); let mode = $state("");
let inputUrl = $state(""); let inputUrl = $state("");
@@ -83,6 +83,12 @@
setInputAsValid( setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params input.split("?", 1)[0].split("/stories/")[1] // removes params
); );
} else if (input.includes("/list/")) {
// https://www.wattpad.com/list/829974064
mode = "list";
setInputAsValid(
input.split("?", 1)[0].split("/list/")[1] // removes params
);
} else { } else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title