13 Commits

Author SHA1 Message Date
Aron BenDaniel e53ba34bac list-downloading v3 2025-11-14 14:57:25 -05:00
Aron BenDaniel 943846a88b Merge remote-tracking branch 'TheOnlyWayUp/fix/#85-image-size' into list-downloading 2025-11-14 14:39:25 -05:00
TheOnlyWayUp a84d4edb6b fix(docker): Install libraries for weasyprint 2025-11-10 00:29:38 +05:30
TheOnlyWayUp 1379f416bc fix(docker): .dockerignore targets subdirectories 2025-11-10 00:29:09 +05:30
TheOnlyWayUp 83466ded4d fix(docker): Remove cache files after install 2025-11-10 00:05:09 +05:30
Aron BenDaniel 6c1a145577 docker: Fix .dockerignore uv.lock entry 2025-10-31 05:49:08 +05:30
Aron BenDaniel f20dfa2017 feat(api): Made PDFs-enabled check earlier 2025-10-30 13:43:12 -04:00
Aron BenDaniel 265799907d docker: Fix merge conflict with #82 2025-10-30 13:38:13 -04:00
Dhanush R cc9ac6093f feat(api): Remove dependency on exiftool (#82) 2025-10-30 16:11:16 +05:30
AaronBenDaniel 2956399b4b fix(api): Change error message capitalization 2025-10-29 18:02:42 +05:30
AaronBenDaniel d372020bac fix: Change name of flag to enable PDF downloads 2025-10-29 18:02:41 +05:30
AaronBenDaniel c2104ee514 feat: Control features with build arg 2025-10-29 18:02:37 +05:30
Aaron BenDaniel fa60de79fd Update README.md copyright year
The future is now.
2025-10-29 01:44:54 +05:30
8 changed files with 243 additions and 107 deletions
+10 -9
View File
@@ -1,10 +1,11 @@
__pycache__ **/__pycache__/
*ipynb **/*.ipynb
build **/build/
.idea .idea/
.vscode .vscode/
.venv **/.venv/
.env **/.env
*log **/.env_template
*.md **/*.log
**/*.md
src/api/uv.lock src/api/uv.lock
+13 -9
View File
@@ -1,4 +1,4 @@
FROM node:20 FROM node:20-alpine
WORKDIR /build WORKDIR /build
COPY src/frontend/package*.json . COPY src/frontend/package*.json .
@@ -6,6 +6,10 @@ RUN rm -rf node_modules
RUN rm -rf build RUN rm -rf build
RUN npm install RUN npm install
COPY src/frontend/. . COPY src/frontend/. .
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
RUN npm run build RUN npm run build
# Thanks https://stackoverflow.com/q/76988450 # Thanks https://stackoverflow.com/q/76988450
@@ -13,15 +17,12 @@ FROM python:3.13-slim
WORKDIR /app WORKDIR /app
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/ RUN apt update && \
apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
RUN apt update apt clean && \
RUN apt install -y aria2 rm -rf /var/lib/apt/lists/*
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13 # aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
# weasyprint depends on libgoject, libpango, and libpangoft2 # weasyprint depends on libgoject, libpango, and libpangoft2
RUN rm -rf /var/lib/apt/lists/*
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950 # https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
@@ -32,7 +33,7 @@ WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY src/api/pyproject.toml /app COPY src/api/pyproject.toml /app
RUN uv sync RUN uv sync && uv cache clean
COPY src/api/ /app COPY src/api/ /app
COPY --from=0 /build/build /app/src/build COPY --from=0 /build/build /app/src/build
@@ -40,6 +41,9 @@ RUN ln -s /app/src/pdf/fonts /tmp/fonts
WORKDIR /app/src WORKDIR /app/src
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
EXPOSE 80 EXPOSE 80
CMD [ "uv", "run", "main.py"] CMD [ "uv", "run", "main.py"]
+1 -1
View File
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
--- ---
<div align="center"> <div align="center">
<p>TheOnlyWayUp © 2024</p> <p>TheOnlyWayUp © 2025</p>
</div> </div>
+2
View File
@@ -5,9 +5,11 @@ from .create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
) )
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator from .generators import EPUBGenerator, PDFGenerator
from .logs import logger from .logs import logger
from .parser import fetch_image from .parser import fetch_image
from .utils import slugify from .utils import slugify
from .models import Story, List
+21 -3
View File
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger from .logs import logger
from .models import Story from .models import Story, List
from .vars import cache, headers from .vars import cache, headers
story_ta = TypeAdapter(Story) story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies. ) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)" f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache headers=headers, cookies=cookies, cache=None if cookies else cache
) as session: ) as session:
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright" f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read()) bytes_stream = BytesIO(await response.read())
return bytes_stream return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
"""Fetch List metadata from a List ID."""
with start_action(action_type="api_fetch_list", list_id=list_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
response.raise_for_status()
body = await response.json()
return body
+7 -1
View File
@@ -1,4 +1,4 @@
from typing import Optional, TypedDict from typing import Optional, TypedDict, NotRequired
class CopyrightData(TypedDict): class CopyrightData(TypedDict):
@@ -22,6 +22,7 @@ class User(TypedDict):
class Part(TypedDict): class Part(TypedDict):
id: int id: int
title: str title: str
deleted: NotRequired[bool]
class Story(TypedDict): class Story(TypedDict):
@@ -40,3 +41,8 @@ class Story(TypedDict):
parts: list[Part] parts: list[Part]
isPaywalled: bool isPaywalled: bool
copyright: int copyright: int
class List(TypedDict):
name: str
stories: list[Story]
+133 -46
View File
@@ -2,6 +2,8 @@
import asyncio import asyncio
from enum import Enum from enum import Enum
from os import getenv
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from zipfile import ZipFile from zipfile import ZipFile
@@ -28,14 +30,19 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
logger, logger,
slugify, slugify,
Story,
List,
) )
from create_book.parser import clean_tree, fetch_tree_images from create_book.parser import clean_tree, fetch_tree_images
app = FastAPI() app = FastAPI()
BUILD_PATH = Path(__file__).parent / "build" BUILD_PATH = Path(__file__).parent / "build"
PDFS_ENABLED = True if getenv("VITE_ENABLE_PDFS") == "true" else False
class RequestCancelledMiddleware: class RequestCancelledMiddleware:
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734 # Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
@@ -82,6 +89,93 @@ class DownloadFormat(Enum):
class DownloadMode(Enum): class DownloadMode(Enum):
story = "story" story = "story"
part = "part" part = "part"
list = "list"
async def download_story(
metadata: Story,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
with start_action(
action_type="download_story",
story_id=metadata["id"],
download_images=download_images,
format=format,
):
# Fetch cover image
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
# Fetch parts archive
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
archive = ZipFile(story_zip, "r")
# Parse part content
part_trees: list[BeautifulSoup] = []
for part in metadata["parts"]:
if "deleted" in part and part["deleted"]:
continue
part_trees.append(
clean_tree(
part["title"],
part["id"],
archive.read(str(part["id"])).decode("utf-8"),
)
)
# Fetch images
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
# Build output file
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
case DownloadFormat.pdf:
# Fetch author profile picture
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
book.compile()
return book.dump()
async def download_list(
metadata: List,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
output_buffer = BytesIO()
with ZipFile(output_buffer, "w") as archive:
for story in metadata["stories"]:
story_file = await download_story(story, download_images, format, cookies)
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
archive.writestr(file_name, story_file.read())
output_buffer.seek(0)
return output_buffer
@app.get("/") @app.get("/")
@@ -130,7 +224,7 @@ async def handle_download(
password: Optional[str] = None, password: Optional[str] = None,
): ):
with start_action( with start_action(
action_type="download", action_type="handle_download",
download_id=download_id, download_id=download_id,
download_images=download_images, download_images=download_images,
format=format, format=format,
@@ -158,68 +252,61 @@ async def handle_download(
else: else:
cookies = None cookies = None
match mode:
case DownloadMode.story:
story_id = download_id
metadata = await fetch_story(story_id, cookies)
case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
part_trees: list[BeautifulSoup] = [
clean_tree(
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
)
for part in metadata["parts"]
]
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
match format: match format:
case DownloadFormat.epub: case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
media_type = "application/epub+zip" media_type = "application/epub+zip"
extension = "epub"
case DownloadFormat.pdf: case DownloadFormat.pdf:
author_image = await fetch_image( if not PDFS_ENABLED:
metadata["user"]["avatar"].replace("-256-", "-512-") logger.error("PDF Downloads not enabled.")
) return HTMLResponse(
if not author_image: status_code=403,
raise HTTPException(status_code=422) content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
media_type = "application/pdf" media_type = "application/pdf"
extension = "pdf"
logger.info(f"Retrieved story metadata and cover ({story_id=})") match mode:
case DownloadMode.story:
metadata = await fetch_story(download_id, cookies)
output_buffer = await download_story(
metadata, download_images, format, cookies
)
case DownloadMode.part:
download_id, metadata = await fetch_story_from_partId(
download_id, cookies
)
output_buffer = await download_story(
metadata, download_images, format, cookies
)
case DownloadMode.list:
if not PDFS_ENABLED:
logger.error("List Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
book.compile() metadata = await fetch_list(download_id, cookies)
output_buffer = await download_list(
metadata, download_images, format, cookies
)
book_buffer = book.dump() media_type = "application/zip"
extension = "zip"
async def iterfile(): async def iterfile():
while chunk := book_buffer.read(512 * 4): # 4 kb/s while chunk := output_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed await asyncio.sleep(0.1) # throttle download speed
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(
iterfile(), output_buffer if PDFS_ENABLED else iterfile(),
media_type=media_type, media_type=media_type,
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(book_buffer.getbuffer().nbytes), "Content-Length": str(output_buffer.getbuffer().nbytes),
}, },
) )
+56 -38
View File
@@ -1,4 +1,6 @@
<script> <script>
const PDFS_ENABLED = import.meta.env.VITE_ENABLE_PDFS === "true";
let downloadImages = $state(false); let downloadImages = $state(false);
let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf
let isPaidStory = $state(false); let isPaidStory = $state(false);
@@ -9,7 +11,7 @@
password: "" password: ""
}); });
let downloadId = $state(""); let downloadId = $state("");
/** @type {"story" | "part" | ""} */ /** @type {"story" | "part" | "list" |""} */
let mode = $state(""); let mode = $state("");
let inputUrl = $state(""); let inputUrl = $state("");
@@ -81,6 +83,12 @@
setInputAsValid( setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params input.split("?", 1)[0].split("/stories/")[1] // removes params
); );
} else if (input.includes("/list/")) {
// https://www.wattpad.com/list/829974064
mode = "list";
setInputAsValid(
input.split("?", 1)[0].split("/list/")[1] // removes params
);
} else { } else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title
@@ -110,29 +118,32 @@
> >
WP Downloader WP Downloader
</h1> </h1>
<div role="alert" class="alert mt-10 max-w-md break-words bg-green-200"> {#if !PDFS_ENABLED}
<svg <div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
xmlns="http://www.w3.org/2000/svg" <svg
fill="none" xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24" fill="none"
class="h-6 w-6 shrink-0 stroke-current" viewBox="0 0 24 24"
> class="h-6 w-6 shrink-0 stroke-current"
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
></path>
</svg>
<div>
<p>
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
</p>
<a href="https://buymeacoffee.com/theonlywayup" class="link" target="_blank"
>Donate now</a
> >
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
></path>
</svg>
<div>
<p>
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
</p>
<a href="https://buymeacoffee.com/theonlywayup" class="link" target="_blank"
>Donate now</a
>
</div>
</div> </div>
</div> {/if}
<!-- <div role="alert" class="alert bg-cyan-300 mt-5"> <!-- <div role="alert" class="alert bg-cyan-300 mt-5">
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
@@ -154,10 +165,13 @@
</p> </p>
<ul class="list list-inside pt-4 text-xl"> <ul class="list list-inside pt-4 text-xl">
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. --> <!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
<!-- <li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li> -->
<li>05/25 - ⚖️ Legal Compliance</li> <li>05/25 - ⚖️ Legal Compliance</li>
<li>12/24 - 📂 Less Errors, Throttled Downloads</li> {#if PDFS_ENABLED}
<li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li>
{:else}
<li>12/24 - 📂 Less Errors, Throttled Downloads</li>
{/if}
<li>11/24 - 🔗 Paste Links!</li> <li>11/24 - 🔗 Paste Links!</li>
<li>11/24 - 📨 Send to Kindle Support!</li> <li>11/24 - 📨 Send to Kindle Support!</li>
@@ -249,26 +263,30 @@
href={url} href={url}
onclick={() => (afterDownloadPage = true)}>Download</a onclick={() => (afterDownloadPage = true)}>Download</a
> >
</div>
<!-- <label class="swap w-fit label mt-2"> {#if PDFS_ENABLED}
<label class="swap w-fit label mt-2 pb-2">
<input type="checkbox" bind:checked={downloadAsPdf} /> <input type="checkbox" bind:checked={downloadAsPdf} />
<div class="swap-on"> <div class="swap-on absolute left-0 text-gray-800">
Downloading as <span class=" underline text-bold">PDF</span> (Click) Downloading as <span class=" underline text-bold">PDF</span> (Click)
</div> </div>
<div class="swap-off"> <div class="swap-off absolute left-0 text-gray-800">
Downloading as <span class=" underline text-bold">EPUB</span> (Click) Downloading as <span class=" underline text-bold">EPUB</span> (Click)
</div> </div>
</label> -->
<label class="label cursor-pointer">
<span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span>
<input
type="checkbox"
class="checkbox-warning checkbox shadow-md"
bind:checked={downloadImages}
/>
</label> </label>
</div> {/if}
<label class="label cursor-pointer">
<span class="label-text text-gray-800"
>Include Images (<strong>Slower Download</strong>)</span
>
<input
type="checkbox"
class="checkbox-warning checkbox shadow-md"
bind:checked={downloadImages}
/>
</label>
</form> </form>
</div> </div>
{:else} {:else}