Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e53ba34bac | |||
| 943846a88b | |||
| a84d4edb6b | |||
| 1379f416bc | |||
| 83466ded4d | |||
| f20dfa2017 | |||
| 265799907d | |||
| 2956399b4b | |||
| d372020bac | |||
| c2104ee514 |
+10
-9
@@ -1,10 +1,11 @@
|
||||
__pycache__
|
||||
*ipynb
|
||||
build
|
||||
.idea
|
||||
.vscode
|
||||
.venv
|
||||
.env
|
||||
*log
|
||||
*.md
|
||||
**/__pycache__/
|
||||
**/*.ipynb
|
||||
**/build/
|
||||
.idea/
|
||||
.vscode/
|
||||
**/.venv/
|
||||
**/.env
|
||||
**/.env_template
|
||||
**/*.log
|
||||
**/*.md
|
||||
src/api/uv.lock
|
||||
|
||||
+13
-9
@@ -1,4 +1,4 @@
|
||||
FROM node:20
|
||||
FROM node:20-alpine
|
||||
|
||||
WORKDIR /build
|
||||
COPY src/frontend/package*.json .
|
||||
@@ -6,6 +6,10 @@ RUN rm -rf node_modules
|
||||
RUN rm -rf build
|
||||
RUN npm install
|
||||
COPY src/frontend/. .
|
||||
|
||||
ARG pdfs=false
|
||||
ENV VITE_ENABLE_PDFS=$pdfs
|
||||
|
||||
RUN npm run build
|
||||
# Thanks https://stackoverflow.com/q/76988450
|
||||
|
||||
@@ -13,15 +17,12 @@ FROM python:3.13-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y aria2
|
||||
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
|
||||
RUN apt update && \
|
||||
apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
|
||||
apt clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
|
||||
# weasyprint depends on libgoject, libpango, and libpangoft2
|
||||
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
|
||||
|
||||
|
||||
@@ -32,7 +33,7 @@ WORKDIR /app
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
COPY src/api/pyproject.toml /app
|
||||
RUN uv sync
|
||||
RUN uv sync && uv cache clean
|
||||
COPY src/api/ /app
|
||||
COPY --from=0 /build/build /app/src/build
|
||||
|
||||
@@ -40,6 +41,9 @@ RUN ln -s /app/src/pdf/fonts /tmp/fonts
|
||||
|
||||
WORKDIR /app/src
|
||||
|
||||
ARG pdfs=false
|
||||
ENV VITE_ENABLE_PDFS=$pdfs
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
CMD [ "uv", "run", "main.py"]
|
||||
|
||||
@@ -5,9 +5,11 @@ from .create_book import (
|
||||
fetch_story,
|
||||
fetch_story_content_zip,
|
||||
fetch_story_from_partId,
|
||||
fetch_list,
|
||||
)
|
||||
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
|
||||
from .generators import EPUBGenerator, PDFGenerator
|
||||
from .logs import logger
|
||||
from .parser import fetch_image
|
||||
from .utils import slugify
|
||||
from .models import Story, List
|
||||
|
||||
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
|
||||
|
||||
from .exceptions import PartNotFoundError, StoryNotFoundError
|
||||
from .logs import logger
|
||||
from .models import Story
|
||||
from .models import Story, List
|
||||
from .vars import cache, headers
|
||||
|
||||
story_ta = TypeAdapter(Story)
|
||||
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
|
||||
headers=headers, cache=None if cookies else cache
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
|
||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
|
||||
) as response:
|
||||
body = await response.json()
|
||||
|
||||
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
|
||||
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||
) as session:
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
|
||||
) as response:
|
||||
body = await response.json()
|
||||
|
||||
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
|
||||
bytes_stream = BytesIO(await response.read())
|
||||
|
||||
return bytes_stream
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
|
||||
"""Fetch List metadata from a List ID."""
|
||||
with start_action(action_type="api_fetch_list", list_id=list_id):
|
||||
async with CachedSession(
|
||||
headers=headers,
|
||||
cookies=cookies,
|
||||
cache=None if cookies else cache,
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
body = await response.json()
|
||||
|
||||
return body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Optional, TypedDict
|
||||
from typing import Optional, TypedDict, NotRequired
|
||||
|
||||
|
||||
class CopyrightData(TypedDict):
|
||||
@@ -22,6 +22,7 @@ class User(TypedDict):
|
||||
class Part(TypedDict):
|
||||
id: int
|
||||
title: str
|
||||
deleted: NotRequired[bool]
|
||||
|
||||
|
||||
class Story(TypedDict):
|
||||
@@ -40,3 +41,8 @@ class Story(TypedDict):
|
||||
parts: list[Part]
|
||||
isPaywalled: bool
|
||||
copyright: int
|
||||
|
||||
|
||||
class List(TypedDict):
|
||||
name: str
|
||||
stories: list[Story]
|
||||
|
||||
+132
-45
@@ -2,6 +2,8 @@
|
||||
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from os import getenv
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from zipfile import ZipFile
|
||||
@@ -28,14 +30,19 @@ from create_book import (
|
||||
fetch_story,
|
||||
fetch_story_content_zip,
|
||||
fetch_story_from_partId,
|
||||
fetch_list,
|
||||
logger,
|
||||
slugify,
|
||||
Story,
|
||||
List,
|
||||
)
|
||||
from create_book.parser import clean_tree, fetch_tree_images
|
||||
|
||||
app = FastAPI()
|
||||
BUILD_PATH = Path(__file__).parent / "build"
|
||||
|
||||
PDFS_ENABLED = True if getenv("VITE_ENABLE_PDFS") == "true" else False
|
||||
|
||||
|
||||
class RequestCancelledMiddleware:
|
||||
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
|
||||
@@ -82,6 +89,93 @@ class DownloadFormat(Enum):
|
||||
class DownloadMode(Enum):
|
||||
story = "story"
|
||||
part = "part"
|
||||
list = "list"
|
||||
|
||||
|
||||
async def download_story(
|
||||
metadata: Story,
|
||||
download_images: bool = False,
|
||||
format: DownloadFormat = DownloadFormat.epub,
|
||||
cookies: dict = None,
|
||||
) -> BytesIO:
|
||||
with start_action(
|
||||
action_type="download_story",
|
||||
story_id=metadata["id"],
|
||||
download_images=download_images,
|
||||
format=format,
|
||||
):
|
||||
# Fetch cover image
|
||||
cover_data = await fetch_image(
|
||||
metadata["cover"].replace("-256-", "-512-")
|
||||
) # Increase resolution
|
||||
if not cover_data:
|
||||
raise HTTPException(status_code=422)
|
||||
|
||||
# Fetch parts archive
|
||||
story_zip = await fetch_story_content_zip(metadata["id"], cookies)
|
||||
archive = ZipFile(story_zip, "r")
|
||||
|
||||
# Parse part content
|
||||
part_trees: list[BeautifulSoup] = []
|
||||
|
||||
for part in metadata["parts"]:
|
||||
if "deleted" in part and part["deleted"]:
|
||||
continue
|
||||
part_trees.append(
|
||||
clean_tree(
|
||||
part["title"],
|
||||
part["id"],
|
||||
archive.read(str(part["id"])).decode("utf-8"),
|
||||
)
|
||||
)
|
||||
|
||||
# Fetch images
|
||||
images = (
|
||||
[await fetch_tree_images(tree) for tree in part_trees]
|
||||
if download_images
|
||||
else []
|
||||
)
|
||||
|
||||
# Build output file
|
||||
match format:
|
||||
case DownloadFormat.epub:
|
||||
book = EPUBGenerator(metadata, part_trees, cover_data, images)
|
||||
case DownloadFormat.pdf:
|
||||
# Fetch author profile picture
|
||||
author_image = await fetch_image(
|
||||
metadata["user"]["avatar"].replace("-256-", "-512-")
|
||||
)
|
||||
if not author_image:
|
||||
raise HTTPException(status_code=422)
|
||||
|
||||
book = PDFGenerator(
|
||||
metadata, part_trees, cover_data, images, author_image
|
||||
)
|
||||
|
||||
logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
|
||||
|
||||
book.compile()
|
||||
|
||||
return book.dump()
|
||||
|
||||
|
||||
async def download_list(
|
||||
metadata: List,
|
||||
download_images: bool = False,
|
||||
format: DownloadFormat = DownloadFormat.epub,
|
||||
cookies: dict = None,
|
||||
) -> BytesIO:
|
||||
output_buffer = BytesIO()
|
||||
|
||||
with ZipFile(output_buffer, "w") as archive:
|
||||
for story in metadata["stories"]:
|
||||
story_file = await download_story(story, download_images, format, cookies)
|
||||
file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
|
||||
archive.writestr(file_name, story_file.read())
|
||||
|
||||
output_buffer.seek(0)
|
||||
|
||||
return output_buffer
|
||||
|
||||
|
||||
@app.get("/")
|
||||
@@ -130,7 +224,7 @@ async def handle_download(
|
||||
password: Optional[str] = None,
|
||||
):
|
||||
with start_action(
|
||||
action_type="download",
|
||||
action_type="handle_download",
|
||||
download_id=download_id,
|
||||
download_images=download_images,
|
||||
format=format,
|
||||
@@ -158,68 +252,61 @@ async def handle_download(
|
||||
else:
|
||||
cookies = None
|
||||
|
||||
match mode:
|
||||
case DownloadMode.story:
|
||||
story_id = download_id
|
||||
metadata = await fetch_story(story_id, cookies)
|
||||
case DownloadMode.part:
|
||||
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
|
||||
|
||||
cover_data = await fetch_image(
|
||||
metadata["cover"].replace("-256-", "-512-")
|
||||
) # Increase resolution
|
||||
if not cover_data:
|
||||
raise HTTPException(status_code=422)
|
||||
|
||||
story_zip = await fetch_story_content_zip(story_id, cookies)
|
||||
archive = ZipFile(story_zip, "r")
|
||||
|
||||
part_trees: list[BeautifulSoup] = [
|
||||
clean_tree(
|
||||
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
|
||||
)
|
||||
for part in metadata["parts"]
|
||||
]
|
||||
|
||||
images = (
|
||||
[await fetch_tree_images(tree) for tree in part_trees]
|
||||
if download_images
|
||||
else []
|
||||
)
|
||||
|
||||
match format:
|
||||
case DownloadFormat.epub:
|
||||
book = EPUBGenerator(metadata, part_trees, cover_data, images)
|
||||
media_type = "application/epub+zip"
|
||||
extension = "epub"
|
||||
case DownloadFormat.pdf:
|
||||
author_image = await fetch_image(
|
||||
metadata["user"]["avatar"].replace("-256-", "-512-")
|
||||
if not PDFS_ENABLED:
|
||||
logger.error("PDF Downloads not enabled.")
|
||||
return HTMLResponse(
|
||||
status_code=403,
|
||||
content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
if not author_image:
|
||||
raise HTTPException(status_code=422)
|
||||
|
||||
book = PDFGenerator(
|
||||
metadata, part_trees, cover_data, images, author_image
|
||||
)
|
||||
media_type = "application/pdf"
|
||||
extension = "pdf"
|
||||
|
||||
logger.info(f"Retrieved story metadata and cover ({story_id=})")
|
||||
match mode:
|
||||
case DownloadMode.story:
|
||||
metadata = await fetch_story(download_id, cookies)
|
||||
output_buffer = await download_story(
|
||||
metadata, download_images, format, cookies
|
||||
)
|
||||
case DownloadMode.part:
|
||||
download_id, metadata = await fetch_story_from_partId(
|
||||
download_id, cookies
|
||||
)
|
||||
output_buffer = await download_story(
|
||||
metadata, download_images, format, cookies
|
||||
)
|
||||
case DownloadMode.list:
|
||||
if not PDFS_ENABLED:
|
||||
logger.error("List Downloads not enabled.")
|
||||
return HTMLResponse(
|
||||
status_code=403,
|
||||
content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
|
||||
book.compile()
|
||||
metadata = await fetch_list(download_id, cookies)
|
||||
output_buffer = await download_list(
|
||||
metadata, download_images, format, cookies
|
||||
)
|
||||
|
||||
book_buffer = book.dump()
|
||||
media_type = "application/zip"
|
||||
extension = "zip"
|
||||
|
||||
async def iterfile():
|
||||
while chunk := book_buffer.read(512 * 4): # 4 kb/s
|
||||
while chunk := output_buffer.read(512 * 4): # 4 kb/s
|
||||
await asyncio.sleep(0.1) # throttle download speed
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
iterfile(),
|
||||
output_buffer if PDFS_ENABLED else iterfile(),
|
||||
media_type=media_type,
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058
|
||||
"Content-Length": str(book_buffer.getbuffer().nbytes),
|
||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"', # Thanks https://stackoverflow.com/a/72729058
|
||||
"Content-Length": str(output_buffer.getbuffer().nbytes),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
<script>
|
||||
const PDFS_ENABLED = import.meta.env.VITE_ENABLE_PDFS === "true";
|
||||
|
||||
let downloadImages = $state(false);
|
||||
let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf
|
||||
let isPaidStory = $state(false);
|
||||
@@ -9,7 +11,7 @@
|
||||
password: ""
|
||||
});
|
||||
let downloadId = $state("");
|
||||
/** @type {"story" | "part" | ""} */
|
||||
/** @type {"story" | "part" | "list" |""} */
|
||||
let mode = $state("");
|
||||
let inputUrl = $state("");
|
||||
|
||||
@@ -81,6 +83,12 @@
|
||||
setInputAsValid(
|
||||
input.split("?", 1)[0].split("/stories/")[1] // removes params
|
||||
);
|
||||
} else if (input.includes("/list/")) {
|
||||
// https://www.wattpad.com/list/829974064
|
||||
mode = "list";
|
||||
setInputAsValid(
|
||||
input.split("?", 1)[0].split("/list/")[1] // removes params
|
||||
);
|
||||
} else {
|
||||
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
|
||||
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title
|
||||
@@ -110,6 +118,7 @@
|
||||
>
|
||||
WP Downloader
|
||||
</h1>
|
||||
{#if !PDFS_ENABLED}
|
||||
<div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
@@ -124,6 +133,7 @@
|
||||
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
|
||||
></path>
|
||||
</svg>
|
||||
|
||||
<div>
|
||||
<p>
|
||||
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
|
||||
@@ -133,6 +143,7 @@
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
<!-- <div role="alert" class="alert bg-cyan-300 mt-5">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
@@ -154,10 +165,13 @@
|
||||
</p>
|
||||
<ul class="list list-inside pt-4 text-xl">
|
||||
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
|
||||
<!-- <li>12/24 - ⚡ Super-fast Downloads!</li>
|
||||
<li>12/24 - 📑 PDF Downloads!</li> -->
|
||||
<li>05/25 - ⚖️ Legal Compliance</li>
|
||||
{#if PDFS_ENABLED}
|
||||
<li>12/24 - ⚡ Super-fast Downloads!</li>
|
||||
<li>12/24 - 📑 PDF Downloads!</li>
|
||||
{:else}
|
||||
<li>12/24 - 📂 Less Errors, Throttled Downloads</li>
|
||||
{/if}
|
||||
<li>11/24 - 🔗 Paste Links!</li>
|
||||
<li>11/24 - 📨 Send to Kindle Support!</li>
|
||||
|
||||
@@ -249,26 +263,30 @@
|
||||
href={url}
|
||||
onclick={() => (afterDownloadPage = true)}>Download</a
|
||||
>
|
||||
</div>
|
||||
|
||||
<!-- <label class="swap w-fit label mt-2">
|
||||
{#if PDFS_ENABLED}
|
||||
<label class="swap w-fit label mt-2 pb-2">
|
||||
<input type="checkbox" bind:checked={downloadAsPdf} />
|
||||
<div class="swap-on">
|
||||
<div class="swap-on absolute left-0 text-gray-800">
|
||||
Downloading as <span class=" underline text-bold">PDF</span> (Click)
|
||||
</div>
|
||||
<div class="swap-off">
|
||||
<div class="swap-off absolute left-0 text-gray-800">
|
||||
Downloading as <span class=" underline text-bold">EPUB</span> (Click)
|
||||
</div>
|
||||
</label> -->
|
||||
</label>
|
||||
{/if}
|
||||
|
||||
<label class="label cursor-pointer">
|
||||
<span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span>
|
||||
<span class="label-text text-gray-800"
|
||||
>Include Images (<strong>Slower Download</strong>)</span
|
||||
>
|
||||
<input
|
||||
type="checkbox"
|
||||
class="checkbox-warning checkbox shadow-md"
|
||||
bind:checked={downloadImages}
|
||||
/>
|
||||
</label>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{:else}
|
||||
|
||||
Reference in New Issue
Block a user