list-downloading v3

Merge remote-tracking branch 'TheOnlyWayUp/fix/#85-image-size' into list-downloading
fix(docker): Install libraries for weasyprint
2025-11-14 14:57:25 -05:00 · 2025-11-14 14:39:25 -05:00 · 2025-11-10 00:29:38 +05:30 · 2025-11-10 00:29:09 +05:30 · 2025-11-10 00:05:09 +05:30 · 2025-10-31 05:49:08 +05:30
8 changed files with 243 additions and 107 deletions
@@ -1,10 +1,11 @@
-__pycache__
+**/__pycache__/
-*ipynb
+**/*.ipynb
-build
+**/build/
-.idea
+.idea/
-.vscode
+.vscode/
-.venv
+**/.venv/
-.env
+**/.env
-*log
+**/.env_template
-*.md
+**/*.log
 **/*.md
 src/api/uv.lock
@@ -1,4 +1,4 @@
-FROM node:20
+FROM node:20-alpine
 WORKDIR /build
 COPY src/frontend/package*.json .
@@ -6,6 +6,10 @@ RUN rm -rf node_modules
 RUN rm -rf build
 RUN npm install
 COPY src/frontend/. .
 ARG pdfs=false
 ENV VITE_ENABLE_PDFS=$pdfs
 RUN npm run build
 # Thanks https://stackoverflow.com/q/76988450
@@ -13,15 +17,12 @@ FROM python:3.13-slim
 WORKDIR /app
-COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
+RUN apt update && \
-
+    apt install -y git build-essential python3.13-dev libglib2.0-0 libpango-1.0-0 libpangoft2-1.0-0 && \
-RUN apt update
+    apt clean && \
-RUN apt install -y aria2
+    rm -rf /var/lib/apt/lists/*
 RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
 # aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
 # weasyprint depends on libgoject, libpango, and libpangoft2
 RUN rm -rf /var/lib/apt/lists/*
 # https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
@@ -32,7 +33,7 @@ WORKDIR /app
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 COPY src/api/pyproject.toml /app
-RUN uv sync
+RUN uv sync && uv cache clean
 COPY src/api/ /app
 COPY --from=0 /build/build /app/src/build
@@ -40,6 +41,9 @@ RUN ln -s /app/src/pdf/fonts /tmp/fonts
 WORKDIR /app/src
 ARG pdfs=false
 ENV VITE_ENABLE_PDFS=$pdfs
 EXPOSE 80
 CMD [ "uv", "run", "main.py"]
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
 ---
 <div align="center">
-    <p>TheOnlyWayUp © 2024</p>
+    <p>TheOnlyWayUp © 2025</p>
 </div>
@@ -5,9 +5,11 @@ from .create_book import (
    fetch_story,
    fetch_story_content_zip,
    fetch_story_from_partId,
    fetch_list,
 )
 from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
 from .generators import EPUBGenerator, PDFGenerator
 from .logs import logger
 from .parser import fetch_image
 from .utils import slugify
 from .models import Story, List
@@ -11,7 +11,7 @@ from pydantic import TypeAdapter
 from .exceptions import PartNotFoundError, StoryNotFoundError
 from .logs import logger
-from .models import Story
+from .models import Story, List
 from .vars import cache, headers
 story_ta = TypeAdapter(Story)
@@ -70,7 +70,7 @@ async def fetch_story_from_partId(
            headers=headers, cache=None if cookies else cache
        ) as session:  # Don't cache requests with Cookies.
            async with session.get(
-                f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
+                f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
            ) as response:
                body = await response.json()
@@ -93,7 +93,7 @@ async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
            headers=headers, cookies=cookies, cache=None if cookies else cache
        ) as session:
            async with session.get(
-                f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
+                f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright"
            ) as response:
                body = await response.json()
@@ -127,3 +127,21 @@ async def fetch_story_content_zip(
                bytes_stream = BytesIO(await response.read())
        return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
 async def fetch_list(list_id: int, cookies: Optional[dict] = None) -> List:
    """Fetch List metadata from a List ID."""
    with start_action(action_type="api_fetch_list", list_id=list_id):
        async with CachedSession(
            headers=headers,
            cookies=cookies,
            cache=None if cookies else cache,
        ) as session:  # Don't cache requests with Cookies.
            async with session.get(
                f"https://www.wattpad.com/api/v3/lists/{list_id}?fields=name,stories(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title,deleted),cover,copyright)"
            ) as response:
                response.raise_for_status()
                body = await response.json()
        return body
@@ -1,4 +1,4 @@
-from typing import Optional, TypedDict
+from typing import Optional, TypedDict, NotRequired
 class CopyrightData(TypedDict):
@@ -22,6 +22,7 @@ class User(TypedDict):
 class Part(TypedDict):
    id: int
    title: str
    deleted: NotRequired[bool]
 class Story(TypedDict):
@@ -40,3 +41,8 @@ class Story(TypedDict):
    parts: list[Part]
    isPaywalled: bool
    copyright: int
 class List(TypedDict):
    name: str
    stories: list[Story]
@@ -2,6 +2,8 @@
 import asyncio
 from enum import Enum
 from os import getenv
 from io import BytesIO
 from pathlib import Path
 from typing import Optional
 from zipfile import ZipFile
@@ -28,14 +30,19 @@ from create_book import (
    fetch_story,
    fetch_story_content_zip,
    fetch_story_from_partId,
    fetch_list,
    logger,
    slugify,
    Story,
    List,
 )
 from create_book.parser import clean_tree, fetch_tree_images
 app = FastAPI()
 BUILD_PATH = Path(__file__).parent / "build"
 PDFS_ENABLED = True if getenv("VITE_ENABLE_PDFS") == "true" else False
 class RequestCancelledMiddleware:
    # Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
@@ -82,6 +89,93 @@ class DownloadFormat(Enum):
 class DownloadMode(Enum):
    story = "story"
    part = "part"
    list = "list"
 async def download_story(
    metadata: Story,
    download_images: bool = False,
    format: DownloadFormat = DownloadFormat.epub,
    cookies: dict = None,
 ) -> BytesIO:
    with start_action(
        action_type="download_story",
        story_id=metadata["id"],
        download_images=download_images,
        format=format,
    ):
        # Fetch cover image
        cover_data = await fetch_image(
            metadata["cover"].replace("-256-", "-512-")
        )  # Increase resolution
        if not cover_data:
            raise HTTPException(status_code=422)
        # Fetch parts archive
        story_zip = await fetch_story_content_zip(metadata["id"], cookies)
        archive = ZipFile(story_zip, "r")
        # Parse part content
        part_trees: list[BeautifulSoup] = []
        for part in metadata["parts"]:
            if "deleted" in part and part["deleted"]:
                continue
            part_trees.append(
                clean_tree(
                    part["title"],
                    part["id"],
                    archive.read(str(part["id"])).decode("utf-8"),
                )
            )
        # Fetch images
        images = (
            [await fetch_tree_images(tree) for tree in part_trees]
            if download_images
            else []
        )
        # Build output file
        match format:
            case DownloadFormat.epub:
                book = EPUBGenerator(metadata, part_trees, cover_data, images)
            case DownloadFormat.pdf:
                # Fetch author profile picture
                author_image = await fetch_image(
                    metadata["user"]["avatar"].replace("-256-", "-512-")
                )
                if not author_image:
                    raise HTTPException(status_code=422)
                book = PDFGenerator(
                    metadata, part_trees, cover_data, images, author_image
                )
        logger.info(f"Retrieved story metadata and cover ({metadata['id']=})")
        book.compile()
        return book.dump()
 async def download_list(
    metadata: List,
    download_images: bool = False,
    format: DownloadFormat = DownloadFormat.epub,
    cookies: dict = None,
 ) -> BytesIO:
    output_buffer = BytesIO()
    with ZipFile(output_buffer, "w") as archive:
        for story in metadata["stories"]:
            story_file = await download_story(story, download_images, format, cookies)
            file_name = f"{slugify(story['title'])}_{story['id']}_{'images' if download_images else ''}.{'epub' if format==DownloadFormat.epub else 'pdf'}"
            archive.writestr(file_name, story_file.read())
    output_buffer.seek(0)
    return output_buffer
@app.get("/")
@@ -130,7 +224,7 @@ async def handle_download(
    password: Optional[str] = None,
 ):
    with start_action(
-        action_type="download",
+        action_type="handle_download",
        download_id=download_id,
        download_images=download_images,
        format=format,
@@ -158,68 +252,61 @@ async def handle_download(
        else:
            cookies = None
        match mode:
            case DownloadMode.story:
                story_id = download_id
                metadata = await fetch_story(story_id, cookies)
            case DownloadMode.part:
                story_id, metadata = await fetch_story_from_partId(download_id, cookies)
        cover_data = await fetch_image(
            metadata["cover"].replace("-256-", "-512-")
        )  # Increase resolution
        if not cover_data:
            raise HTTPException(status_code=422)
        story_zip = await fetch_story_content_zip(story_id, cookies)
        archive = ZipFile(story_zip, "r")
        part_trees: list[BeautifulSoup] = [
            clean_tree(
                part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
            )
            for part in metadata["parts"]
        ]
        images = (
            [await fetch_tree_images(tree) for tree in part_trees]
            if download_images
            else []
        )
        match format:
            case DownloadFormat.epub:
                book = EPUBGenerator(metadata, part_trees, cover_data, images)
                media_type = "application/epub+zip"
                extension = "epub"
            case DownloadFormat.pdf:
-                author_image = await fetch_image(
+                if not PDFS_ENABLED:
-                    metadata["user"]["avatar"].replace("-256-", "-512-")
+                    logger.error("PDF Downloads not enabled.")
-                )
+                    return HTMLResponse(
-                if not author_image:
+                        status_code=403,
-                    raise HTTPException(status_code=422)
+                        content='PDF Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
                    )
                book = PDFGenerator(
                    metadata, part_trees, cover_data, images, author_image
                )
                media_type = "application/pdf"
                extension = "pdf"
-        logger.info(f"Retrieved story metadata and cover ({story_id=})")
+        match mode:
            case DownloadMode.story:
                metadata = await fetch_story(download_id, cookies)
                output_buffer = await download_story(
                    metadata, download_images, format, cookies
                )
            case DownloadMode.part:
                download_id, metadata = await fetch_story_from_partId(
                    download_id, cookies
                )
                output_buffer = await download_story(
                    metadata, download_images, format, cookies
                )
            case DownloadMode.list:
                if not PDFS_ENABLED:
                    logger.error("List Downloads not enabled.")
                    return HTMLResponse(
                        status_code=403,
                        content='List Downloads have been disabled by the server administrator. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
                    )
-        book.compile()
+                metadata = await fetch_list(download_id, cookies)
                output_buffer = await download_list(
                    metadata, download_images, format, cookies
                )
-        book_buffer = book.dump()
+                media_type = "application/zip"
                extension = "zip"
        async def iterfile():
-            while chunk := book_buffer.read(512 * 4):  # 4 kb/s
+            while chunk := output_buffer.read(512 * 4):  # 4 kb/s
                await asyncio.sleep(0.1)  # throttle download speed
                yield chunk
        return StreamingResponse(
-            iterfile(),
+            output_buffer if PDFS_ENABLED else iterfile(),
            media_type=media_type,
            headers={
-                "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"',  # Thanks https://stackoverflow.com/a/72729058
+                "Content-Disposition": f'attachment; filename="{slugify(metadata["name" if mode==DownloadMode.list else "title"])}_{download_id}{"_images" if download_images else ""}.{extension}"',  # Thanks https://stackoverflow.com/a/72729058
-                "Content-Length": str(book_buffer.getbuffer().nbytes),
+                "Content-Length": str(output_buffer.getbuffer().nbytes),
            },
        )
@@ -1,4 +1,6 @@
 <script>
  const PDFS_ENABLED = import.meta.env.VITE_ENABLE_PDFS === "true";
  let downloadImages = $state(false);
  let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf
  let isPaidStory = $state(false);
@@ -9,7 +11,7 @@
    password: ""
  });
  let downloadId = $state("");
-  /** @type {"story" | "part" | ""} */
+  /** @type {"story" | "part" | "list" |""} */
  let mode = $state("");
  let inputUrl = $state("");
@@ -81,6 +83,12 @@
      setInputAsValid(
        input.split("?", 1)[0].split("/stories/")[1] // removes params
      );
    } else if (input.includes("/list/")) {
      // https://www.wattpad.com/list/829974064
      mode = "list";
      setInputAsValid(
        input.split("?", 1)[0].split("/list/")[1] // removes params
      );
    } else {
      // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
      input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title
@@ -110,29 +118,32 @@
          >
            WP Downloader
          </h1>
-          <div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
+          {#if !PDFS_ENABLED}
-            <svg
+            <div role="alert" class="alert mt-10 max-w-md break-words bg-green-200">
-              xmlns="http://www.w3.org/2000/svg"
+              <svg
-              fill="none"
+                xmlns="http://www.w3.org/2000/svg"
-              viewBox="0 0 24 24"
+                fill="none"
-              class="h-6 w-6 shrink-0 stroke-current"
+                viewBox="0 0 24 24"
-            >
+                class="h-6 w-6 shrink-0 stroke-current"
              <path
                stroke-linecap="round"
                stroke-linejoin="round"
                stroke-width="2"
                d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
              ></path>
            </svg>
            <div>
              <p>
                Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
              </p>
              <a href="https://buymeacoffee.com/theonlywayup" class="link" target="_blank"
                >Donate now</a
              >
                <path
                  stroke-linecap="round"
                  stroke-linejoin="round"
                  stroke-width="2"
                  d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
                ></path>
              </svg>
              <div>
                <p>
                  Donators get access to <span class="font-semibold">high-speed PDF Downloads</span>
                </p>
                <a href="https://buymeacoffee.com/theonlywayup" class="link" target="_blank"
                  >Donate now</a
                >
              </div>
            </div>
-          </div>
+          {/if}
          <!-- <div role="alert" class="alert bg-cyan-300 mt-5">
            <svg
              xmlns="http://www.w3.org/2000/svg"
@@ -154,10 +165,13 @@
          </p>
          <ul class="list list-inside pt-4 text-xl">
            <!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
            <!-- <li>12/24 - ⚡ Super-fast Downloads!</li>
            <li>12/24 - 📑 PDF Downloads!</li> -->
            <li>05/25 - ⚖️ Legal Compliance</li>
-            <li>12/24 - 📂 Less Errors, Throttled Downloads</li>
+            {#if PDFS_ENABLED}
              <li>12/24 - ⚡ Super-fast Downloads!</li>
              <li>12/24 - 📑 PDF Downloads!</li>
            {:else}
              <li>12/24 - 📂 Less Errors, Throttled Downloads</li>
            {/if}
            <li>11/24 - 🔗 Paste Links!</li>
            <li>11/24 - 📨 Send to Kindle Support!</li>
@@ -249,26 +263,30 @@
                href={url}
                onclick={() => (afterDownloadPage = true)}>Download</a
              >
            </div>
-              <!-- <label class="swap w-fit label mt-2">
+            {#if PDFS_ENABLED}
              <label class="swap w-fit label mt-2 pb-2">
                <input type="checkbox" bind:checked={downloadAsPdf} />
-                <div class="swap-on">
+                <div class="swap-on absolute left-0 text-gray-800">
                  Downloading as <span class=" underline text-bold">PDF</span> (Click)
                </div>
-                <div class="swap-off">
+                <div class="swap-off absolute left-0 text-gray-800">
                  Downloading as <span class=" underline text-bold">EPUB</span> (Click)
                </div>
              </label> -->
              <label class="label cursor-pointer">
                <span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span>
                <input
                  type="checkbox"
                  class="checkbox-warning checkbox shadow-md"
                  bind:checked={downloadImages}
                />
              </label>
-            </div>
+            {/if}
            <label class="label cursor-pointer">
              <span class="label-text text-gray-800"
                >Include Images (<strong>Slower Download</strong>)</span
              >
              <input
                type="checkbox"
                class="checkbox-warning checkbox shadow-md"
                bind:checked={downloadImages}
              />
            </label>
          </form>
        </div>
      {:else}
Author	SHA1	Message	Date
Aron BenDaniel	e53ba34bac	list-downloading v3	2025-11-14 14:57:25 -05:00
Aron BenDaniel	943846a88b	Merge remote-tracking branch 'TheOnlyWayUp/fix/#85-image-size' into list-downloading	2025-11-14 14:39:25 -05:00
TheOnlyWayUp	a84d4edb6b	fix(docker): Install libraries for weasyprint	2025-11-10 00:29:38 +05:30
TheOnlyWayUp	1379f416bc	fix(docker): .dockerignore targets subdirectories	2025-11-10 00:29:09 +05:30
TheOnlyWayUp	83466ded4d	fix(docker): Remove cache files after install	2025-11-10 00:05:09 +05:30
Aron BenDaniel	6c1a145577	docker: Fix .dockerignore uv.lock entry	2025-10-31 05:49:08 +05:30
Aron BenDaniel	f20dfa2017	feat(api): Made PDFs-enabled check earlier	2025-10-30 13:43:12 -04:00
Aron BenDaniel	265799907d	docker: Fix merge conflict with #82	2025-10-30 13:38:13 -04:00
Dhanush R	cc9ac6093f	feat(api): Remove dependency on exiftool (#82 )	2025-10-30 16:11:16 +05:30
AaronBenDaniel	2956399b4b	fix(api): Change error message capitalization	2025-10-29 18:02:42 +05:30
AaronBenDaniel	d372020bac	fix: Change name of flag to enable PDF downloads	2025-10-29 18:02:41 +05:30
AaronBenDaniel	c2104ee514	feat: Control features with build arg	2025-10-29 18:02:37 +05:30
Aaron BenDaniel	fa60de79fd	Update README.md copyright year The future is now.	2025-10-29 01:44:54 +05:30