2 Commits

Author SHA1 Message Date
Aron BenDaniel 0e106e6da6 Update copyright year to 2026 2026-01-08 01:12:06 +05:30
Aron BenDaniel 68eb4a22b7 fix(api): Restructure archive parsing to iterate through archive items 2026-01-07 00:48:15 +05:30
10 changed files with 127 additions and 249 deletions
+9 -10
View File
@@ -1,11 +1,10 @@
**/__pycache__/
**/*.ipynb
**/build/
.idea/
.vscode/
**/.venv/
**/.env
**/.env_template
**/*.log
**/*.md
__pycache__
*ipynb
build
.idea
.vscode
.venv
.env
*log
*.md
src/api/uv.lock
+9 -13
View File
@@ -1,4 +1,4 @@
FROM node:20-alpine
FROM node:20
WORKDIR /build
COPY src/frontend/package*.json .
@@ -6,10 +6,6 @@ RUN rm -rf node_modules
RUN rm -rf build
RUN npm install
COPY src/frontend/. .
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
RUN npm run build
# Thanks https://stackoverflow.com/q/76988450
@@ -17,12 +13,15 @@ FROM python:3.13-slim
WORKDIR /app
RUN apt update && \
apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
apt clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
RUN apt update
RUN apt install -y aria2
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
# weasyprint depends on libgoject, libpango, and libpangoft2
RUN rm -rf /var/lib/apt/lists/*
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
@@ -33,7 +32,7 @@ WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY src/api/pyproject.toml /app
RUN uv sync && uv cache clean
RUN uv sync
COPY src/api/ /app
COPY --from=0 /build/build /app/src/build
@@ -41,9 +40,6 @@ RUN ln -s /app/src/pdf/fonts /tmp/fonts
WORKDIR /app/src
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
EXPOSE 80
CMD [ "uv", "run", "main.py"]
+1 -1
View File
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
---
<div align="center">
<p>TheOnlyWayUp © 2025</p>
<p>TheOnlyWayUp © 2026</p>
</div>
-2
View File
@@ -5,11 +5,9 @@ from .create_book import (
fetch_story,
fetch_story_content_zip,
fetch_story_from_partId,
fetch_list,
)
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator
from .logs import logger
from .parser import fetch_image
from .utils import slugify
from .models import Story, List
+3 -21
View File
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger
from .models import Story, List
from .models import Story
from .vars import cache, headers
story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
) as response:
body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache
) as session:
async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
) as response:
body = await response.json()
@@ -127,21 +127,3 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read())
return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
"""Fetch List metadata from a List ID."""
with start_action(action_type="api_fetch_list", list_id=list_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
response.raise_for_status()
body = await response.json()
return body
+1 -7
View File
@@ -1,4 +1,4 @@
from typing import Optional, TypedDict, NotRequired
from typing import Optional, TypedDict
class CopyrightData(TypedDict):
@@ -22,7 +22,6 @@ class User(TypedDict):
class Part(TypedDict):
id: int
title: str
deleted: NotRequired[bool]
class Story(TypedDict):
@@ -41,8 +40,3 @@ class Story(TypedDict):
parts: list[Part]
isPaywalled: bool
copyright: int
class List(TypedDict):
name: str
stories: list[Story]
+61 -134
View File
@@ -2,8 +2,6 @@
import asyncio
from enum import Enum
from os import getenv
from io import BytesIO
from pathlib import Path
from typing import Optional
from zipfile import ZipFile
@@ -30,19 +28,14 @@ from create_book import (
fetch_story,
fetch_story_content_zip,
fetch_story_from_partId,
fetch_list,
logger,
slugify,
Story,
List,
)
from create_book.parser import clean_tree, fetch_tree_images
app = FastAPI()
BUILD_PATH = Path(__file__).parent / "build"
PDFS_ENABLED = True if getenv("VITE_ENABLE_PDFS") == "true" else False
class RequestCancelledMiddleware:
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
@@ -89,93 +82,6 @@ class DownloadFormat(Enum):
class DownloadMode(Enum):
story = "story"
part = "part"
list = "list"
async def download_story(
metadata: Story,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
with start_action(
action_type="download_story",
story_id=metadata["id"],
download_images=download_images,
format=format,
):
# Fetch cover image
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
# Fetch parts archive
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
archive = ZipFile(story_zip, "r")
# Parse part content
part_trees: list[BeautifulSoup] = []
for part in metadata["parts"]:
if "deleted" in part and part["deleted"]:
continue
part_trees.append(
clean_tree(
part["title"],
part["id"],
archive.read(str(part["id"])).decode("utf-8"),
)
)
# Fetch images
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
# Build output file
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
case DownloadFormat.pdf:
# Fetch author profile picture
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
book.compile()
return book.dump()
async def download_list(
metadata: List,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
output_buffer = BytesIO()
with ZipFile(output_buffer, "w") as archive:
for story in metadata["stories"]:
story_file = await download_story(story, download_images, format, cookies)
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
archive.writestr(file_name, story_file.read())
output_buffer.seek(0)
return output_buffer
@app.get("/")
@@ -224,7 +130,7 @@ async def handle_download(
password: Optional[str] = None,
):
with start_action(
action_type="handle_download",
action_type="download",
download_id=download_id,
download_images=download_images,
format=format,
@@ -252,61 +158,82 @@ async def handle_download(
else:
cookies = None
match format:
case DownloadFormat.epub:
media_type = "application/epub+zip"
extension = "epub"
case DownloadFormat.pdf:
if not PDFS_ENABLED:
logger.error("PDF Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
media_type = "application/pdf"
extension = "pdf"
match mode:
case DownloadMode.story:
metadata = await fetch_story(download_id, cookies)
output_buffer = await download_story(
metadata, download_images, format, cookies
)
story_id = download_id
metadata = await fetch_story(story_id, cookies)
case DownloadMode.part:
download_id, metadata = await fetch_story_from_partId(
download_id, cookies
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
# Transform part metadata into an easily-indexable dictionary
part_id_title_dictionary = {
str(part["id"]): part["title"] for part in metadata["parts"]
}
part_trees: list[BeautifulSoup] = []
for id in archive.namelist():
if (
id not in part_id_title_dictionary
): # If a part is deleted and the old story_zip is cached, this is needed to avoid a KeyError exception
continue
part_trees.append(
clean_tree(
part_id_title_dictionary[id],
id,
archive.read(id).decode("utf-8"),
)
output_buffer = await download_story(
metadata, download_images, format, cookies
)
case DownloadMode.list:
if not PDFS_ENABLED:
logger.error("List Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
metadata = await fetch_list(download_id, cookies)
output_buffer = await download_list(
metadata, download_images, format, cookies
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
media_type = "application/zip"
extension = "zip"
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
media_type = "application/epub+zip"
case DownloadFormat.pdf:
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
media_type = "application/pdf"
logger.info(f"Retrieved story metadata and cover ({story_id=})")
book.compile()
book_buffer = book.dump()
async def iterfile():
while chunk := output_buffer.read(512 * 4): # 4 kb/s
while chunk := book_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed
yield chunk
return StreamingResponse(
output_buffer if PDFS_ENABLED else iterfile(),
iterfile(),
media_type=media_type,
headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(output_buffer.getbuffer().nbytes),
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(book_buffer.getbuffer().nbytes),
},
)
+1 -1
View File
@@ -19,7 +19,7 @@
href="https://github.com/TheOnlyWayUp"
class="underline"
target="_blank">TheOnlyWayUp</a
> © 2025
> © 2026
</p>
</aside>
</footer>
+1 -1
View File
@@ -44,7 +44,7 @@
>
</div>
<p>
Copyright © 2025 - All rights reserved by <a
Copyright © 2026 - All rights reserved by <a
href="https://rambhat.la"
class="link"
target="_blank"
+9 -27
View File
@@ -1,6 +1,4 @@
<script>
const PDFS_ENABLED = import.meta.env.VITE_ENABLE_PDFS === "true";
let downloadImages = $state(false);
let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf
let isPaidStory = $state(false);
@@ -11,7 +9,7 @@
password: ""
});
let downloadId = $state("");
/** @type {"story" | "part" | "list" |""} */
/** @type {"story" | "part" | ""} */
let mode = $state("");
let inputUrl = $state("");
@@ -83,12 +81,6 @@
setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params
);
} else if (input.includes("/list/")) {
// https://www.wattpad.com/list/829974064
mode = "list";
setInputAsValid(
input.split("?", 1)[0].split("/list/")[1] // removes params
);
} else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title
@@ -118,7 +110,6 @@
>
WP Downloader
</h1>
{#if !PDFS_ENABLED}
<div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
<svg
xmlns="http://www.w3.org/2000/svg"
@@ -133,7 +124,6 @@
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
></path>
</svg>
<div>
<p>
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
@@ -143,7 +133,6 @@
>
</div>
</div>
{/if}
<!-- <div role="alert" class="alert bg-cyan-300 mt-5">
<svg
xmlns="http://www.w3.org/2000/svg"
@@ -165,13 +154,10 @@
</p>
<ul class="list list-inside pt-4 text-xl">
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
<!-- <li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li> -->
<li>05/25 - ⚖️ Legal Compliance</li>
{#if PDFS_ENABLED}
<li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li>
{:else}
<li>12/24 - 📂 Less Errors, Throttled Downloads</li>
{/if}
<li>11/24 - 🔗 Paste Links!</li>
<li>11/24 - 📨 Send to Kindle Support!</li>
@@ -263,30 +249,26 @@
href={url}
onclick={() => (afterDownloadPage = true)}>Download</a
>
</div>
{#if PDFS_ENABLED}
<label class="swap w-fit label mt-2 pb-2">
<!-- <label class="swap w-fit label mt-2">
<input type="checkbox" bind:checked={downloadAsPdf} />
<div class="swap-on absolute left-0 text-gray-800">
<div class="swap-on">
Downloading as <span class=" underline text-bold">PDF</span> (Click)
</div>
<div class="swap-off absolute left-0 text-gray-800">
<div class="swap-off">
Downloading as <span class=" underline text-bold">EPUB</span> (Click)
</div>
</label>
{/if}
</label> -->
<label class="label cursor-pointer">
<span class="label-text text-gray-800"
>Include Images (<strong>Slower Download</strong>)</span
>
<span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span>
<input
type="checkbox"
class="checkbox-warning checkbox shadow-md"
bind:checked={downloadImages}
/>
</label>
</div>
</form>
</div>
{:else}