1 Commits

Author SHA1 Message Date
Aron BenDaniel 68022ca547 docker: Fix .dockerignore uv.lock entry 2025-10-30 17:07:30 -04:00
8 changed files with 111 additions and 247 deletions
+9 -10
View File
@@ -1,11 +1,10 @@
**/__pycache__/ __pycache__
**/*.ipynb *ipynb
**/build/ build
.idea/ .idea
.vscode/ .vscode
**/.venv/ .venv
**/.env .env
**/.env_template *log
**/*.log *.md
**/*.md
src/api/uv.lock src/api/uv.lock
+9 -13
View File
@@ -1,4 +1,4 @@
FROM node:20-alpine FROM node:20
WORKDIR /build WORKDIR /build
COPY src/frontend/package*.json . COPY src/frontend/package*.json .
@@ -6,10 +6,6 @@ RUN rm -rf node_modules
RUN rm -rf build RUN rm -rf build
RUN npm install RUN npm install
COPY src/frontend/. . COPY src/frontend/. .
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
RUN npm run build RUN npm run build
# Thanks https://stackoverflow.com/q/76988450 # Thanks https://stackoverflow.com/q/76988450
@@ -17,12 +13,15 @@ FROM python:3.13-slim
WORKDIR /app WORKDIR /app
RUN apt update && \ COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
apt clean && \ RUN apt update
rm -rf /var/lib/apt/lists/* RUN apt install -y aria2
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13 # aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
# weasyprint depends on libgoject, libpango, and libpangoft2 # weasyprint depends on libgoject, libpango, and libpangoft2
RUN rm -rf /var/lib/apt/lists/*
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950 # https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
@@ -33,7 +32,7 @@ WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY src/api/pyproject.toml /app COPY src/api/pyproject.toml /app
RUN uv sync && uv cache clean RUN uv sync
COPY src/api/ /app COPY src/api/ /app
COPY --from=0 /build/build /app/src/build COPY --from=0 /build/build /app/src/build
@@ -41,9 +40,6 @@ RUN ln -s /app/src/pdf/fonts /tmp/fonts
WORKDIR /app/src WORKDIR /app/src
ARG pdfs=false
ENV VITE_ENABLE_PDFS=$pdfs
EXPOSE 80 EXPOSE 80
CMD [ "uv", "run", "main.py"] CMD [ "uv", "run", "main.py"]
+1 -1
View File
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
--- ---
<div align="center"> <div align="center">
<p>TheOnlyWayUp © 2025</p> <p>TheOnlyWayUp © 2024</p>
</div> </div>
-2
View File
@@ -5,11 +5,9 @@ from .create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
) )
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator from .generators import EPUBGenerator, PDFGenerator
from .logs import logger from .logs import logger
from .parser import fetch_image from .parser import fetch_image
from .utils import slugify from .utils import slugify
from .models import Story, List
+3 -21
View File
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger from .logs import logger
from .models import Story, List from .models import Story
from .vars import cache, headers from .vars import cache, headers
story_ta = TypeAdapter(Story) story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
headers=headers, cache=None if cookies else cache headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies. ) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)" f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
headers=headers, cookies=cookies, cache=None if cookies else cache headers=headers, cookies=cookies, cache=None if cookies else cache
) as session: ) as session:
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright" f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
) as response: ) as response:
body = await response.json() body = await response.json()
@@ -127,21 +127,3 @@ async def fetch_story_content_zip(
bytes_stream = BytesIO(await response.read()) bytes_stream = BytesIO(await response.read())
return bytes_stream return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
"""Fetch List metadata from a List ID."""
with start_action(action_type="api_fetch_list", list_id=list_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
) as response:
response.raise_for_status()
body = await response.json()
return body
+1 -7
View File
@@ -1,4 +1,4 @@
from typing import Optional, TypedDict, NotRequired from typing import Optional, TypedDict
class CopyrightData(TypedDict): class CopyrightData(TypedDict):
@@ -22,7 +22,6 @@ class User(TypedDict):
class Part(TypedDict): class Part(TypedDict):
id: int id: int
title: str title: str
deleted: NotRequired[bool]
class Story(TypedDict): class Story(TypedDict):
@@ -41,8 +40,3 @@ class Story(TypedDict):
parts: list[Part] parts: list[Part]
isPaywalled: bool isPaywalled: bool
copyright: int copyright: int
class List(TypedDict):
name: str
stories: list[Story]
+47 -134
View File
@@ -2,8 +2,6 @@
import asyncio import asyncio
from enum import Enum from enum import Enum
from os import getenv
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from zipfile import ZipFile from zipfile import ZipFile
@@ -30,19 +28,14 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_content_zip, fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
fetch_list,
logger, logger,
slugify, slugify,
Story,
List,
) )
from create_book.parser import clean_tree, fetch_tree_images from create_book.parser import clean_tree, fetch_tree_images
app = FastAPI() app = FastAPI()
BUILD_PATH = Path(__file__).parent / "build" BUILD_PATH = Path(__file__).parent / "build"
PDFS_ENABLED = True if getenv("VITE_ENABLE_PDFS") == "true" else False
class RequestCancelledMiddleware: class RequestCancelledMiddleware:
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734 # Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
@@ -89,93 +82,6 @@ class DownloadFormat(Enum):
class DownloadMode(Enum): class DownloadMode(Enum):
story = "story" story = "story"
part = "part" part = "part"
list = "list"
async def download_story(
metadata: Story,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
with start_action(
action_type="download_story",
story_id=metadata["id"],
download_images=download_images,
format=format,
):
# Fetch cover image
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
# Fetch parts archive
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
archive = ZipFile(story_zip, "r")
# Parse part content
part_trees: list[BeautifulSoup] = []
for part in metadata["parts"]:
if "deleted" in part and part["deleted"]:
continue
part_trees.append(
clean_tree(
part["title"],
part["id"],
archive.read(str(part["id"])).decode("utf-8"),
)
)
# Fetch images
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
# Build output file
match format:
case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images)
case DownloadFormat.pdf:
# Fetch author profile picture
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
book.compile()
return book.dump()
async def download_list(
metadata: List,
download_images: bool = False,
format: DownloadFormat = DownloadFormat.epub,
cookies: dict = None,
) -> BytesIO:
output_buffer = BytesIO()
with ZipFile(output_buffer, "w") as archive:
for story in metadata["stories"]:
story_file = await download_story(story, download_images, format, cookies)
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
archive.writestr(file_name, story_file.read())
output_buffer.seek(0)
return output_buffer
@app.get("/") @app.get("/")
@@ -224,7 +130,7 @@ async def handle_download(
password: Optional[str] = None, password: Optional[str] = None,
): ):
with start_action( with start_action(
action_type="handle_download", action_type="download",
download_id=download_id, download_id=download_id,
download_images=download_images, download_images=download_images,
format=format, format=format,
@@ -252,61 +158,68 @@ async def handle_download(
else: else:
cookies = None cookies = None
match format:
case DownloadFormat.epub:
media_type = "application/epub+zip"
extension = "epub"
case DownloadFormat.pdf:
if not PDFS_ENABLED:
logger.error("PDF Downloads not enabled.")
return HTMLResponse(
status_code=403,
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
media_type = "application/pdf"
extension = "pdf"
match mode: match mode:
case DownloadMode.story: case DownloadMode.story:
metadata = await fetch_story(download_id, cookies) story_id = download_id
output_buffer = await download_story( metadata = await fetch_story(story_id, cookies)
metadata, download_images, format, cookies
)
case DownloadMode.part: case DownloadMode.part:
download_id, metadata = await fetch_story_from_partId( story_id, metadata = await fetch_story_from_partId(download_id, cookies)
download_id, cookies
cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
part_trees: list[BeautifulSoup] = [
clean_tree(
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
) )
output_buffer = await download_story( for part in metadata["parts"]
metadata, download_images, format, cookies ]
)
case DownloadMode.list: images = (
if not PDFS_ENABLED: [await fetch_tree_images(tree) for tree in part_trees]
logger.error("List Downloads not enabled.") if download_images
return HTMLResponse( else []
status_code=403,
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
) )
metadata = await fetch_list(download_id, cookies) match format:
output_buffer = await download_list( case DownloadFormat.epub:
metadata, download_images, format, cookies book = EPUBGenerator(metadata, part_trees, cover_data, images)
media_type = "application/epub+zip"
case DownloadFormat.pdf:
author_image = await fetch_image(
metadata["user"]["avatar"].replace("-256-", "-512-")
) )
if not author_image:
raise HTTPException(status_code=422)
media_type = "application/zip" book = PDFGenerator(
extension = "zip" metadata, part_trees, cover_data, images, author_image
)
media_type = "application/pdf"
logger.info(f"Retrieved story metadata and cover ({story_id=})")
book.compile()
book_buffer = book.dump()
async def iterfile(): async def iterfile():
while chunk := output_buffer.read(512 * 4): # 4 kb/s while chunk := book_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed await asyncio.sleep(0.1) # throttle download speed
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(
output_buffer if PDFS_ENABLED else iterfile(), iterfile(),
media_type=media_type, media_type=media_type,
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(output_buffer.getbuffer().nbytes), "Content-Length": str(book_buffer.getbuffer().nbytes),
}, },
) )
+9 -27
View File
@@ -1,6 +1,4 @@
<script> <script>
const PDFS_ENABLED = import.meta.env.VITE_ENABLE_PDFS === "true";
let downloadImages = $state(false); let downloadImages = $state(false);
let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf
let isPaidStory = $state(false); let isPaidStory = $state(false);
@@ -11,7 +9,7 @@
password: "" password: ""
}); });
let downloadId = $state(""); let downloadId = $state("");
/** @type {"story" | "part" | "list" |""} */ /** @type {"story" | "part" | ""} */
let mode = $state(""); let mode = $state("");
let inputUrl = $state(""); let inputUrl = $state("");
@@ -83,12 +81,6 @@
setInputAsValid( setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params input.split("?", 1)[0].split("/stories/")[1] // removes params
); );
} else if (input.includes("/list/")) {
// https://www.wattpad.com/list/829974064
mode = "list";
setInputAsValid(
input.split("?", 1)[0].split("/list/")[1] // removes params
);
} else { } else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title
@@ -118,7 +110,6 @@
> >
WP Downloader WP Downloader
</h1> </h1>
{#if !PDFS_ENABLED}
<div role="alert" class="alert mt-10 max-w-md break-words bg-green-200"> <div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
@@ -133,7 +124,6 @@
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
></path> ></path>
</svg> </svg>
<div> <div>
<p> <p>
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span> Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
@@ -143,7 +133,6 @@
> >
</div> </div>
</div> </div>
{/if}
<!-- <div role="alert" class="alert bg-cyan-300 mt-5"> <!-- <div role="alert" class="alert bg-cyan-300 mt-5">
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
@@ -165,13 +154,10 @@
</p> </p>
<ul class="list list-inside pt-4 text-xl"> <ul class="list list-inside pt-4 text-xl">
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. --> <!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
<!-- <li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li> -->
<li>05/25 - ⚖️ Legal Compliance</li> <li>05/25 - ⚖️ Legal Compliance</li>
{#if PDFS_ENABLED}
<li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li>
{:else}
<li>12/24 - 📂 Less Errors, Throttled Downloads</li> <li>12/24 - 📂 Less Errors, Throttled Downloads</li>
{/if}
<li>11/24 - 🔗 Paste Links!</li> <li>11/24 - 🔗 Paste Links!</li>
<li>11/24 - 📨 Send to Kindle Support!</li> <li>11/24 - 📨 Send to Kindle Support!</li>
@@ -263,30 +249,26 @@
href={url} href={url}
onclick={() => (afterDownloadPage = true)}>Download</a onclick={() => (afterDownloadPage = true)}>Download</a
> >
</div>
{#if PDFS_ENABLED} <!-- <label class="swap w-fit label mt-2">
<label class="swap w-fit label mt-2 pb-2">
<input type="checkbox" bind:checked={downloadAsPdf} /> <input type="checkbox" bind:checked={downloadAsPdf} />
<div class="swap-on absolute left-0 text-gray-800"> <div class="swap-on">
Downloading as <span class=" underline text-bold">PDF</span> (Click) Downloading as <span class=" underline text-bold">PDF</span> (Click)
</div> </div>
<div class="swap-off absolute left-0 text-gray-800"> <div class="swap-off">
Downloading as <span class=" underline text-bold">EPUB</span> (Click) Downloading as <span class=" underline text-bold">EPUB</span> (Click)
</div> </div>
</label> </label> -->
{/if}
<label class="label cursor-pointer"> <label class="label cursor-pointer">
<span class="label-text text-gray-800" <span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span>
>Include Images (<strong>Slower Download</strong>)</span
>
<input <input
type="checkbox" type="checkbox"
class="checkbox-warning checkbox shadow-md" class="checkbox-warning checkbox shadow-md"
bind:checked={downloadImages} bind:checked={downloadImages}
/> />
</label> </label>
</div>
</form> </form>
</div> </div>
{:else} {:else}