fix: Add git to Dockerfile

api: Update requirements.txt
api: Use keydb fork of aiohttp-client-cache
2024-12-03 05:50:01 +05:30 · 2024-12-02 11:37:08 +00:00 · 2024-12-02 11:25:32 +00:00 · 2024-12-01 09:42:25 +05:30 · 2024-12-01 00:15:03 +00:00 · 2024-12-01 00:13:22 +00:00
16 changed files with 1881 additions and 267 deletions
@@ -4,3 +4,7 @@ venv
 data
 *ipynb
 build
+.vscode
+.venv
+.env
+*log
@@ -12,6 +12,10 @@ RUN npm run build
 FROM python:3.10-slim

 WORKDIR /app
+
+# Install git
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+
 COPY src/api/requirements.txt requirements.txt
 RUN pip3 install -r requirements.txt
 COPY --from=0 /build/build /app/build
@@ -2,7 +2,7 @@ WattpadDownloader ([Demo](https://wpd.rambhat.la))
 ---
 Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files.

-![image](https://github.com/TheOnlyWayUp/WattpadDownloader/assets/76237496/8a3fda0b-b851-4c5f-9306-ba9c17cdcc8b)
+![image](https://github.com/user-attachments/assets/b9d87d6b-5302-4561-98b0-d7f95bff9f04)


 Stars ⭐ are appreciated. Thanks!
@@ -14,7 +14,7 @@ Stars ⭐ are appreciated. Thanks!
 - 🐇 Fast Generation, Ratelimit Handling.
 - 🐳 Docker Support
 - 🏷️ Generated EPUB File includes Metadata. (Dublin Core Spec)
- 📖 Plays well with E-Readers. (Kindle Support if KOReader present, ReMarkable, KOBO, ...)
+- 📖 Plays well with E-Readers. (Kindle Support with Send2Kindle, ReMarkable, KOBO, KOReader...)
 - 💻 Easily Hackable. Extend with ease.


@@ -25,6 +25,20 @@ Stars ⭐ are appreciated. Thanks!

 That's it! You can use your instance at `http://localhost:5042`. API Documentation is available at `http://localhost:5042/docs`.

+### Concurrent Requests
+The file-based cache struggles with concurrent requests (discussed in TheOnlyWayUp/WattpadDownloader#2 and TheOnlyWayUp/WattpadDownloader#22). If you're downloading a large number of books concurrently, switch to the Redis cache. Assuming you've built the image already:
+1. Fill the .env file. Localhost will not work in a docker container unless [`host.docker.internal`](https://docs.docker.com/desktop/features/networking/#i-want-to-connect-from-a-container-to-a-service-on-the-host) or a platform-specific variant is provided.
+```
+USE_CACHE=true
+CACHE_TYPE=redis
+REDIS_CONNECTION_URL=redis://username:password@host:port
+```
+
+
+2. Run the container and supply the .env file, `docker run -d -p 5042:80 --env-file .env wp_downloader`
+Alternatively, if Redis is running on localhost
+2. Modify your `.env` file, replacing `localhost` with `host.docker.internal`. `redis://localhost:6379` should become `redis://host.docker.internal:6379`. Then, start the container, `docker run -d -p 5042:80 --env-file .env --add-host host.docker.internal:host-gateway wp_downloader`
+
 ---

 My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fast and well-documented package.
@@ -0,0 +1,3 @@
+USE_CACHE=true
+CACHE_TYPE=file
+REDIS_CONNECTION_URL=
@@ -0,0 +1 @@
+3.10
@@ -0,0 +1,26 @@
+[project]
+name = "api"
+version = "0.1.0"
+description = "Wattpad Downloader API"
+readme = "../../README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "aiohttp>=3.9.1",
+    "rich>=13.9.4",
+    "fastapi>=0.115.5",
+    "ebooklib>=0.18",
+    "python-dotenv>=1.0.1",
+    "pydantic-settings>=2.6.1",
+    "eliot>=1.16.0",
+    "type-extensions>=0.1.2",
+    "backoff>=2.2.1",
+    "aiohttp-client-cache[all]",
+    "bs4>=0.0.2",
+    "uvicorn>=0.32.1",
+]
+
+[tool.ruff.lint]
+ignore = ['E402']
+
+[tool.uv.sources]
+aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-cache.git", rev = "keydb-ttl" }
@@ -1,62 +1,61 @@
-aiofiles==23.2.1
-aiohttp==3.9.1
-aiohttp-client-cache==0.10.0
+aioboto3==13.2.0
+aiobotocore==2.15.2
+aiofiles==24.1.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.9
+aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6
+aioitertools==0.12.0
 aiosignal==1.3.1
-aiosqlite==0.19.0
-annotated-types==0.6.0
-anyio==4.2.0
-asttokens==2.4.1
+aiosqlite==0.20.0
+annotated-types==0.7.0
+anyio==4.6.2.post1
 async-timeout==4.0.3
 attrs==23.1.0
 backoff==2.2.1
 beautifulsoup4==4.12.3
+boltons==24.1.0
+boto3==1.35.36
+botocore==1.35.36
 bs4==0.0.2
 click==8.1.7
-comm==0.2.0
-debugpy==1.8.0
-decorator==5.1.1
-EbookLib==0.18
-exceptiongroup==1.2.0
-executing==2.0.1
-fastapi==0.108.0
+dnspython==2.7.0
+ebooklib==0.18
+eliot==1.16.0
+exceptiongroup==1.2.2
+fastapi==0.115.5
 frozenlist==1.4.1
 h11==0.14.0
 idna==3.6
-ipykernel==6.28.0
-ipython==8.19.0
-itsdangerous==2.1.2
-jedi==0.19.1
-jupyter_client==8.6.0
-jupyter_core==5.5.1
-lxml==4.9.4
+itsdangerous==2.2.0
+jmespath==1.0.1
+lxml==5.3.0
 markdown-it-py==3.0.0
-matplotlib-inline==0.1.6
 mdurl==0.1.2
+motor==3.6.0
 multidict==6.0.4
-nest-asyncio==1.5.8
-packaging==23.2
-parso==0.8.3
-pexpect==4.9.0
-platformdirs==4.1.0
-prompt-toolkit==3.0.43
-psutil==5.9.7
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pydantic==2.5.3
-pydantic_core==2.14.6
-Pygments==2.17.2
-python-dateutil==2.8.2
-pyzmq==25.1.2
-rich==13.7.0
+orjson==3.10.12
+propcache==0.2.1
+pydantic==2.10.2
+pydantic-core==2.27.1
+pydantic-settings==2.6.1
+pygments==2.18.0
+pymongo==4.9.2
+pyrsistent==0.20.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+redis==5.2.0
+rich==13.9.4
+s3transfer==0.10.4
+setuptools==75.6.0
 six==1.16.0
-sniffio==1.3.0
-soupsieve==2.5
-stack-data==0.6.3
-starlette==0.32.0.post1
-tornado==6.4
-traitlets==5.14.0
-typing_extensions==4.9.0
+sniffio==1.3.1
+soupsieve==2.6
+starlette==0.41.3
+type-extensions==0.1.2
+typing-extensions==4.12.2
 url-normalize==1.4.3
-uvicorn==0.25.0
-wcwidth==0.2.12
-yarl==1.9.4
+urllib3==2.2.3
+uvicorn==0.32.1
+wrapt==1.17.0
+yarl==1.18.3
+zope-interface==7.2
@@ -1,61 +1,104 @@
-import asyncio
-from typing import Optional
-from ebooklib import epub
-import unicodedata
+from typing import List, Optional, Tuple
+from typing_extensions import TypedDict
 import re
+import unicodedata
+import logging
+from os import environ
+from enum import Enum
 import backoff
-from aiohttp import ClientResponseError, ClientSession
-from aiohttp_client_cache.session import CachedSession
-from aiohttp_client_cache import FileBackend
+from eliot import to_file, start_action
+from eliot.stdlib import EliotHandler
+from dotenv import load_dotenv
+from ebooklib import epub
+from ebooklib.epub import EpubBook
 from bs4 import BeautifulSoup
+from pydantic import TypeAdapter, model_validator, field_validator
+from pydantic_settings import BaseSettings
+from aiohttp import ClientResponseError
+from aiohttp_client_cache.session import CachedSession
+from aiohttp_client_cache import FileBackend, RedisBackend

+load_dotenv(override=True)
+
+handler = EliotHandler()
+logging.getLogger("fastapi").setLevel(logging.INFO)
+logging.getLogger("fastapi").addHandler(handler)
+
+if environ.get("DEBUG"):
+    to_file(open("eliot.log", "wb"))
+
+logger = logging.Logger("wpd")
+logger.addHandler(handler)
+
+# --- #
+
+
+class CacheTypes(Enum):
+    file = "file"
+    redis = "redis"
+
+
+class Config(BaseSettings):
+    USE_CACHE: bool = True
+    CACHE_TYPE: CacheTypes = CacheTypes.file
+    REDIS_CONNECTION_URL: str = ""
+
+    @field_validator("USE_CACHE", mode="before")
+    def validate_use_cache(cls, value):
+        # Return default if value is an empty string
+        if value == "":
+            return True  # Default value for USE_CACHE
+        return value
+
+    @field_validator("CACHE_TYPE", mode="before")
+    def validate_cache_type(cls, value):
+        # Thanks https://stackoverflow.com/a/78157474
+        if value == "":
+            return "file"
+        return value
+
+    @model_validator(mode="after")
+    def prevent_mismatched_redis_url(self):
+        match self.CACHE_TYPE:
+            case CacheTypes.file:
+                if self.REDIS_CONNECTION_URL:
+                    raise ValueError(
+                        "REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
+                    )
+            case CacheTypes.redis:
+                if not self.REDIS_CONNECTION_URL:
+                    raise ValueError(
+                        "REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
+                    )
+        return self
+
+
+config = Config()
+
+# --- #

 headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
 }

-cache = FileBackend(use_temp=True, expire_after=43200)  # 12 hours
+if config.USE_CACHE:
+    match config.CACHE_TYPE:
+        case CacheTypes.file:
+            cache = FileBackend(use_temp=True, expire_after=43200)  # 12 hours
+        case CacheTypes.redis:
+            cache = RedisBackend(
+                cache_name="wpd-aiohttp-cache",
+                address=config.REDIS_CONNECTION_URL,
+                expire_after=43200,  # 12 hours
+            )
+else:
+    cache = None
+
+logger.info(f"Using {cache=}")

 # --- Utilities --- #


-async def wp_get_cookies(username: str, password: str) -> dict:
-    # source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
-    """Retrieves authorization cookies from Wattpad by logging in with user creds.
-
-    Args:
-        username (str): Username.
-        password (str): Password.
-
-    Raises:
-        ValueError: Bad status code.
-        ValueError: No cookies returned.
-
-    Returns:
-        dict: Authorization cookies.
-    """
-    async with ClientSession(headers=headers) as session:
-        async with session.post(
-            "https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
-            data={
-                "username": username.lower(),
-                "password": password,
-            },  # the username.lower() is for caching
-        ) as response:
-            if response.status != 204:
-                raise ValueError("Not a 204.")
-
-            cookies = {
-                k: v.value
-                for k, v in response.cookies.items()  # Thanks https://stackoverflow.com/a/32281245
-            }
-
-            if not cookies:
-                raise ValueError("No cookies.")
-
-            return cookies
-
-
 def slugify(value, allow_unicode=False) -> str:
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
@@ -79,44 +122,128 @@ def slugify(value, allow_unicode=False) -> str:
    return re.sub(r"[-\s]+", "-", value).strip("-_")


+async def wp_get_cookies(username: str, password: str) -> dict:
+    # source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
+    """Retrieves authorization cookies from Wattpad by logging in with user creds.
+
+    Args:
+        username (str): Username.
+        password (str): Password.
+
+    Raises:
+        ValueError: Bad status code.
+        ValueError: No cookies returned.
+
+    Returns:
+        dict: Authorization cookies.
+    """
+    with start_action(action_type="api_fetch_cookies"):
+        async with CachedSession(headers=headers, cache=None) as session:
+            async with session.post(
+                "https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
+                data={
+                    "username": username.lower(),
+                    "password": password,
+                },  # the username.lower() is for caching
+            ) as response:
+                if response.status != 204:
+                    raise ValueError("Not a 204.")
+
+                cookies = {
+                    k: v.value
+                    for k, v in response.cookies.items()  # Thanks https://stackoverflow.com/a/32281245
+                }
+
+                if not cookies:
+                    raise ValueError("No cookies.")
+
+                return cookies
+
+
+# --- Models --- #
+
+
+class Language(TypedDict):
+    name: str
+
+
+class User(TypedDict):
+    username: str
+
+
+class Part(TypedDict):
+    id: int
+    title: str
+
+
+class Story(TypedDict):
+    id: str
+    title: str
+    createDate: str
+    modifyDate: str
+    language: Language
+    user: User
+    description: str
+    cover: str
+    completed: bool
+    tags: List[str]
+    mature: bool
+    url: str
+    parts: List[Part]
+    isPaywalled: bool
+
+
+story_ta = TypeAdapter(Story)
+
 # --- API Calls --- #


@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
-async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
-    """Taking a story_id, return its information from the Wattpad API."""
-    async with (
-        CachedSession(headers=headers, cache=cache)
-        if not cookies
-        else ClientSession(headers=headers, cookies=cookies)
+async def fetch_story_from_partId(
+    part_id: int, cookies: Optional[dict] = None
+) -> Tuple[str, Story]:
+    """Return a Story ID from a Part ID."""
+    with start_action(action_type="api_fetch_storyFromPartId"):
+        async with CachedSession(
+            headers=headers, cache=None if cookies else cache
        ) as session:  # Don't cache requests with Cookies.
            async with session.get(
-            f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
+                f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover)"
            ) as response:
-            if not response.ok:
-                if response.status in [404, 400]:
-                    return {}
                response.raise_for_status()

                body = await response.json()

-    return body
+        return str(body["groupId"]), story_ta.validate_python(body["group"])
+
+
+@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
+async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> Story:
+    """Taking a story_id, return its information from the Wattpad API."""
+    with start_action(action_type="api_fetch_story", story_id=story_id):
+        async with CachedSession(
+            headers=headers, cookies=cookies, cache=None if cookies else cache
+        ) as session:
+            async with session.get(
+                f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
+            ) as response:
+                response.raise_for_status()
+
+                body = await response.json()
+
+        return story_ta.validate_python(body)


@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
 async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
    """Return the HTML Content of a Part."""
-    async with (
-        CachedSession(headers=headers, cache=cache)
-        if not cookies
-        else ClientSession(headers=headers, cookies=cookies)
-    ) as session:  # Don't cache requests with Cookies.
+    with start_action(action_type="api_fetch_partContent", part_id=part_id):
+        async with CachedSession(
+            headers=headers, cookies=cookies, cache=None if cookies else cache
+        ) as session:
            async with session.get(
                f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
            ) as response:
-            if not response.ok:
-                if response.status in [404, 400]:
-                    return ""
                response.raise_for_status()

                body = await response.text()
@@ -125,17 +252,13 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st


@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
-async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
-    """Fetch image bytes."""
-    async with (
-        CachedSession(headers=headers, cache=cache)
-        if not cookies
-        else ClientSession(headers=headers, cookies=cookies)
-    ) as session:  # Don't cache requests with Cookies.
+async def fetch_cover(url: str) -> bytes:
+    """Fetch cover image bytes."""
+    with start_action(action_type="api_fetch_cover", url=url):
+        async with CachedSession(
+            headers=headers, cache=None
+        ) as session:  # Don't cache images.
            async with session.get(url) as response:
-            if not response.ok:
-                if response.status in [404, 400]:
-                    return bytes()
                response.raise_for_status()

                body = await response.read()
@@ -146,11 +269,13 @@ async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
 # --- EPUB Generation --- #


-def set_metadata(book, data):
+def set_metadata(book: EpubBook, data: Story) -> None:
+    """Set book metadata."""
    book.add_author(data["user"]["username"])

+    book.add_metadata("DC", "title", data["title"])
    book.add_metadata("DC", "description", data["description"])
-    book.add_metadata("DC", "created", data["createDate"])
+    book.add_metadata("DC", "date", data["createDate"])
    book.add_metadata("DC", "modified", data["modifyDate"])
    book.add_metadata("DC", "language", data["language"]["name"])

@@ -165,19 +290,26 @@ def set_metadata(book, data):
    )


-async def set_cover(book, data, cookies: Optional[dict] = None):
-    book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
+async def set_cover(book: EpubBook, data: Story) -> None:
+    """Set book cover."""
+    book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
+    chapter = epub.EpubHtml(
+        file_name="titlepage.xhtml",  # Standard for cover page
+    )
+    chapter.set_content('<img src="cover.jpg">')


 async def add_chapters(
-    book, data, download_images: bool = False, cookies: Optional[dict] = None
+    book: EpubBook,
+    data: Story,
+    download_images: bool = False,
+    cookies: Optional[dict] = None,
 ):
    chapters = []

    for cidx, part in enumerate(data["parts"]):
        content = await fetch_part_content(part["id"], cookies=cookies)
        title = part["title"]
-        clean_title = slugify(title)

        # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
        chapter = epub.EpubHtml(
@@ -188,14 +320,15 @@ async def add_chapters(

        if download_images:
            soup = BeautifulSoup(content, "lxml")
-            async with (
-                CachedSession(headers=headers, cache=cache)
-                if not cookies
-                else ClientSession(headers=headers, cookies=cookies)
-            ) as session:  # Don't cache requests with Cookies.
+
+            async with CachedSession(
+                headers=headers, cache=None
+            ) as session:  # Don't cache images.
                for idx, image in enumerate(soup.find_all("img")):
                    if not image["src"]:
                        continue
+                    # Find all image tags and filter for those with sources
+
                    async with session.get(image["src"]) as response:
                        img = epub.EpubImage(
                            media_type="image/jpeg",
@@ -203,8 +336,10 @@ async def add_chapters(
                            file_name=f"static/{cidx}/{idx}.jpeg",
                        )
                        book.add_item(img)
+                        # Fetch image and pack
+
                        content = content.replace(
-                            str(image), f'<img src="static/{cidx}/{idx}.jpeg"/>'
+                            str(image["src"]), f"static/{cidx}/{idx}.jpeg"
                        )

        chapter.set_content(f"<h1>{title}</h1>" + content)
@@ -216,7 +351,7 @@ async def add_chapters(
    for chapter in chapters:
        book.add_item(chapter)

-    book.toc = tuple(chapters)
+    book.toc = chapters

    # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
    book.add_item(epub.EpubNcx())
@@ -1,7 +1,16 @@
+"""WattpadDownloader API Server."""
+
 from typing import Optional
+import asyncio
+import tempfile
 from pathlib import Path
-from fastapi import FastAPI, HTTPException
+from io import BytesIO
+from enum import Enum
+from eliot import start_action
+from aiohttp import ClientResponseError
+from fastapi import FastAPI, Request
 from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
 from ebooklib import epub
 from create_book import (
    retrieve_story,
@@ -10,28 +19,106 @@ from create_book import (
    add_chapters,
    slugify,
    wp_get_cookies,
+    fetch_story_from_partId,
+    logger,
 )
-import tempfile
-from io import BytesIO
-from fastapi.staticfiles import StaticFiles
+

 app = FastAPI()
 BUILD_PATH = Path(__file__).parent / "build"

+headers = {
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
+}
+
+
+class RequestCancelledMiddleware:
+    # Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
+    def __init__(self, app):
+        self.app = app
+
+    async def __call__(self, scope, receive, send):
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        # Let's make a shared queue for the request messages
+        queue = asyncio.Queue()
+
+        async def message_poller(sentinel, handler_task):
+            nonlocal queue
+            while True:
+                message = await receive()
+                if message["type"] == "http.disconnect":
+                    handler_task.cancel()
+                    return sentinel  # Break the loop
+
+                # Puts the message in the queue
+                await queue.put(message)
+
+        sentinel = object()
+        handler_task = asyncio.create_task(self.app(scope, queue.get, send))
+        asyncio.create_task(message_poller(sentinel, handler_task))
+
+        try:
+            return await handler_task
+        except asyncio.CancelledError:
+            logger.info("Cancelling task as connection closed")
+
+
+app.add_middleware(RequestCancelledMiddleware)
+
+
+class DownloadMode(Enum):
+    story = "story"
+    part = "part"
+

@app.get("/")
 def home():
    return FileResponse(BUILD_PATH / "index.html")


-@app.get("/download/{story_id}")
-async def download_book(
-    story_id: int,
+@app.exception_handler(ClientResponseError)
+def download_error_handler(request: Request, exception: ClientResponseError):
+    match exception.status:
+        case 400 | 404:
+            return HTMLResponse(
+                status_code=404,
+                content='This story does not exist, or has been deleted. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
+            )
+        case 429:
+            # Rate-limit by Wattpad
+            return HTMLResponse(
+                status_code=429,
+                content='The website is overloaded. Please try again in a few minutes. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
+            )
+        case _:
+            # Unhandled error
+            return HTMLResponse(
+                status_code=500,
+                content='Something went wrong. Yell at me on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
+            )
+
+
+@app.get("/download/{download_id}")
+async def handle_download(
+    download_id: int,
    download_images: bool = False,
+    mode: DownloadMode = DownloadMode.story,
    username: Optional[str] = None,
    password: Optional[str] = None,
 ):
+    with start_action(
+        action_type="download",
+        download_id=download_id,
+        download_images=download_images,
+        mode=mode,
+    ):
        if username and not password or password and not username:
+            logger.error(
+                "Username with no Password or Password with no Username provided."
+            )
            return HTMLResponse(
                status_code=422,
                content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
@@ -42,6 +129,7 @@ async def download_book(
            try:
                cookies = await wp_get_cookies(username=username, password=password)
            except ValueError:
+                logger.error("Invalid username or password.")
                return HTMLResponse(
                    status_code=403,
                    content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
@@ -49,25 +137,22 @@ async def download_book(
        else:
            cookies = None

-    data = await retrieve_story(story_id, cookies=cookies)
+        match mode:
+            case DownloadMode.story:
+                story_id = download_id
+                metadata = await retrieve_story(story_id, cookies)
+            case DownloadMode.part:
+                story_id, metadata = await fetch_story_from_partId(download_id, cookies)
+
+        logger.info(f"Retrieved story id ({story_id=})")
+
        book = epub.EpubBook()
+        set_metadata(book, metadata)
+        await set_cover(book, metadata)

-    try:
-        set_metadata(book, data)
-    except KeyError:
-        return HTMLResponse(
-            status_code=404,
-            content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
-        )
-
-    await set_cover(book, data, cookies=cookies)
-    # print("Metadata Downloaded")
-
-    # Chapters are downloaded
        async for title in add_chapters(
-        book, data, download_images=download_images, cookies=cookies
+            book, metadata, download_images=download_images, cookies=cookies
        ):
-        # print(f"Part ({title}) downloaded")
            ...

        # Book is compiled
@@ -85,7 +170,7 @@ async def download_book(
            BytesIO(book_data),
            media_type="application/epub+zip",
            headers={
-            "Content-Disposition": f'attachment; filename="{slugify(data["title"])}_{story_id}_{"images" if download_images else ""}.epub"'  # Thanks https://stackoverflow.com/a/72729058
+                "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"'  # Thanks https://stackoverflow.com/a/72729058
            },
        )

@@ -96,4 +181,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
 if __name__ == "__main__":
    import uvicorn

-    uvicorn.run(app, host="0.0.0.0", port=80)
+    uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16)
@@ -25,8 +25,6 @@

 		<!-- Meta Tags Generated with https://metatags.io -->

-		<script defer src="https://feedback.fish/ff.js?pid=f8df016d4ffdfb"></script>
-
 		%sveltekit.head%		  
 	</head>
 	<body data-sveltekit-preload-data="hover">
@@ -18,10 +18,10 @@
  <aside>
    <div class="grid grid-cols-3 max-w-lg w-full">
      <a
-        href="https://liberapay.com/TheOnlyWayUp/"
+        href="https://patreon.com/theonlywayup"
        target="_blank"
        class="link"
-        data-umami-event="Footer Donate">Donate</a
+        data-umami-event="Footer Donate">Patreon</a
      >
      <a
        href="https://rambhat.la"
@@ -1,45 +1,79 @@
 <script>
-  let story_id = "";
  let download_images = false;
  let is_paid_story = false;
+  let invalid_url = false;
+  let after_download_page = false;
  let credentials = {
    username: "",
    password: "",
  };
-  let after_download_page = false;
-  let url = "";
-
-  let raw_story_id = "";
-  let is_part_id = false;
+  let download_id = "";
+  let mode = "";
+  let input_url = "";

  let button_disabled = false;
  $: button_disabled =
-    !story_id ||
+    !input_url ||
    (is_paid_story && !(credentials.username && credentials.password));

-  $: {
-    is_part_id = false;
-    if (raw_story_id.includes("wattpad.com")) {
-      // Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
-      // In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
+  $: url =
+    `/download/` +
+    download_id +
+    `?om=1` +
+    (download_images ? "&download_images=true" : "") +
+    (is_paid_story
+      ? `&username=${encodeURIComponent(credentials.username)}&password=${encodeURIComponent(credentials.password)}`
+      : "") +
+    `&mode=${mode}`;

-      if (raw_story_id.includes("/story/")) {
+  $: {
+    if (input_url.length) {
+      input_url = input_url.toLowerCase();
+
+      invalid_url = false;
+
+      if (/^\d+$/.test(input_url)) {
+        // All numbers
+        download_id = input_url;
+        mode = "story";
+      } else if (input_url.includes("wattpad.com/")) {
+        // Is a string and contains contain wattpad.com/
+
+        if (input_url.includes("/story/")) {
          // https://wattpad.com/story/237369078-wattpad-books-presents
-        story_id = raw_story_id.split("/story/")[1].split("-")[0];
-        raw_story_id = story_id;
-      } else if (raw_story_id.includes("/stories/")) {
+          input_url = input_url.split("-")[0].split("?")[0].split("/story/")[1]; // removes tracking fields and title
+          download_id = input_url;
+          mode = "story";
+        } else if (input_url.includes("/stories/")) {
          // https://www.wattpad.com/api/v3/stories/237369078?fields=...
-        story_id = raw_story_id.split("/stories/")[1].split("?")[0];
-        raw_story_id = story_id;
+          input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
+          download_id = input_url;
+          mode = "story";
        } else {
-        // https://www.wattpad.com/939051741-wattpad-books-presents-part-name
-        is_part_id = true;
-        raw_story_id = "";
-        story_id = "";
+          // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
+          input_url = input_url.split("-")[0].split("?")[0].split("wattpad.com/")[1]; // removes tracking fields and title
+          download_id = input_url;
+          if (/^\d+$/.test(download_id)) {
+            // If "wattpad.com/{download_id}" contains only numbers
+            mode = "part";
+          } else {
+            invalid_url = true;
+            input_url = "";
+            download_id = "";
+          }
        }
      } else {
-      story_id = parseInt(raw_story_id) || ""; // parseInt returns NaN for undefined values.
-      raw_story_id = story_id;
+        invalid_url = true;
+      }
+
+      input_url = input_url.match(/\d+/g)?.join("") || "";
+      download_id = input_url;
+
+      // Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
+      // In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
+    } else {
+      invalid_url = false;
+      download_id = "";
    }
  }
 </script>
@@ -61,6 +95,18 @@
          </p>
          <ul class="pt-4 list list-inside text-xl">
            <!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
+            <li>12/24 - 📂 Improved Performance</li>
+            <li>11/24 - 🔗 Paste Links!</li>
+            <li>11/24 - 📨 Send to Kindle Support!</li>
+
+            <li>11/24 - ⚒️ Fix Image Downloads</li>
+            <li>
+              10/24 - 👾 Add the <a
+                href="https://discord.com/oauth2/authorize?client_id=1292173380065296395&permissions=274878285888&scope=bot%20applications.commands"
+                target="_blank"
+                class="link underline">Discord Bot</a
+              >!
+            </li>
            <li>07/24 - 🔡 RTL Language support! (Arabic, etc.)</li>
            <li>06/24 - 🔑 Authenticated Downloads!</li>
            <li>06/24 - 🖼️ Image Downloading!</li>
@@ -71,29 +117,29 @@
            <div class="form-control">
              <input
                type="text"
-                placeholder="Story ID"
+                placeholder="Story URL"
                class="input input-bordered"
-                class:input-warning={is_part_id}
-                bind:value={raw_story_id}
+                class:input-warning={invalid_url}
+                bind:value={input_url}
                required
-                name="story_id"
+                name="input_url"
              />
-              <label class="label" for="story_id">
-                {#if is_part_id}
+              <label class="label" for="input_url">
+                {#if invalid_url}
                  <p class=" text-red-500">
                    Refer to (<button
                      class="link font-semibold"
-                      onclick="StoryIDTutorialModal.showModal()"
-                      data-umami-event="Part StoryIDTutorialModal Open"
-                      >How to get a Story ID</button
+                      onclick="StoryURLTutorialModal.showModal()"
+                      data-umami-event="Part StoryURLTutorialModal Open"
+                      >How to get a Story URL</button
                    >).
                  </p>
                {:else}
                  <button
                    class="label-text link font-semibold"
-                    onclick="StoryIDTutorialModal.showModal()"
-                    data-umami-event="StoryIDTutorialModal Open"
-                    >How to get a Story ID</button
+                    onclick="StoryURLTutorialModal.showModal()"
+                    data-umami-event="StoryURLTutorialModal Open"
+                    >How to get a Story URL</button
                  >
                {/if}
              </label>
@@ -187,7 +233,13 @@
              >, where we release features early and discuss updates.
            </p>
          </div>
-          <a href="/" class="btn btn-outline btn-lg mt-10">Download More</a>
+          <button
+            on:click={() => {
+              after_download_page = false;
+              input_url = "";
+            }}
+            class="btn btn-outline btn-lg mt-10">Download More</button
+          >
        </div>
      {/if}
    </div>
@@ -196,32 +248,31 @@

 <!-- Open the modal using ID.showModal() method -->

-<dialog id="StoryIDTutorialModal" class="modal">
+<dialog id="StoryURLTutorialModal" class="modal">
  <div class="modal-box">
    <form method="dialog">
      <button class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2"
        >✕</button
      >
    </form>
-    <h3 class="font-bold text-lg">Retrieving a Story ID</h3>
+    <h3 class="font-bold text-lg">Finding the Story URL</h3>
    <ol class="list list-disc list-inside py-4 space-y-4">
      <li>
-        Open the Story URL, this page includes the story description and tags.
-        (For example, <span class="font-mono bg-slate-100 p-1"
-          >wattpad.com/story/237369078-wattpad-books-presents</span
-        >).
+        Copy the URL from the Website, or hit share and copy the URL on the App.
      </li>
      <li>
-        Copy the numbers after the <span class="font-mono bg-slate-100 p-1"
-          >/</span
-        >
-        (In the example, that'd be,
+        For example,
        <span class="font-mono bg-slate-100 p-1"
-          >wattpad.com/story/<span class="bg-amber-200 p-1">237369078</span
-          >-wattpad-books-presents</span
-        >)
+          >wattpad.com/<span class="bg-amber-200 rounded-sm">story</span
+          >/237369078-wattpad-books-presents</span
+        >.
      </li>
-      <li>Paste the Story ID and hit Download!</li>
+      <li>
+        <span class="font-mono bg-slate-100 p-1"
+          >https://www.wattpad.com/939103774-given</span
+        > is okay too.
+      </li>
+      <li>Paste the URL and hit Download!</li>
    </ol>
  </div>
  <form method="dialog" class="modal-backdrop">
Author	SHA1	Message	Date
AaronBenDaniel	f8900be6b3	fix: Add git to Dockerfile	2024-12-03 05:50:01 +05:30
TheOnlyWayUp	a458b9c2f1	api: Update requirements.txt	2024-12-02 11:37:08 +00:00
TheOnlyWayUp	18d4df0674	api: Use keydb fork of aiohttp-client-cache Natively expire hash key submembers	2024-12-02 11:25:32 +00:00
AaronBenDaniel	c1db7babdd	fix(frontend): Strip tracking info from URLs	2024-12-01 09:42:25 +05:30
TheOnlyWayUp	f40d1e4b27	fix: README	2024-12-01 00:15:03 +00:00
TheOnlyWayUp	39837f6305	docs: Add Redis guide to README	2024-12-01 00:13:22 +00:00
TheOnlyWayUp	974c0bd341	fix(frontend): Update changelog	2024-12-01 00:04:52 +00:00
TheOnlyWayUp	5687c5f2cd	fix(api): TTL for Redis Cache	2024-11-30 23:44:07 +00:00
Dhanush R	5f0676a19d	Merge pull request #23 from TheOnlyWayUp/fix/#22-redis-cache Concurrent requests fail Co-authored-by: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com>	2024-12-01 03:48:07 +05:30
AaronBenDaniel	ec700ce284	fix(frontend): Remove unused function	2024-11-30 17:16:43 -05:00
AaronBenDaniel	eafef1f1ec	fix(frontend): Remove debug console.log()	2024-11-30 17:02:17 -05:00
TheOnlyWayUp	8e8773a61a	fix(api): Lower logging status for debug message	2024-11-30 21:58:51 +00:00
TheOnlyWayUp	2b1d00b08e	fix(frontend): Allow IDs to be typed	2024-11-30 21:53:16 +00:00
TheOnlyWayUp	c29c26b33b	Update requirements.txt	2024-11-30 21:38:12 +00:00
TheOnlyWayUp	f91a01e574	feat(api): Add type validation for API Responses	2024-11-30 21:37:47 +00:00
TheOnlyWayUp	a31c26f8c5	fix(api): Improve readability	2024-11-30 21:25:07 +00:00
TheOnlyWayUp	8b00d0b109	fix(api): Add logfiles to gitignore, remove debug code	2024-11-30 21:14:21 +00:00
TheOnlyWayUp	26b9db8945	fix(api): Remove unnecessary API Request, remove test script	2024-11-30 21:10:17 +00:00
TheOnlyWayUp	a755ddb0e4	fix(api): Use CachedSession across codebase	2024-11-30 20:57:20 +00:00
TheOnlyWayUp	28e40ece94	feat(api): Add eliot logging, fix no cookies in authed requests	2024-11-30 20:54:59 +00:00
TheOnlyWayUp	6e222c1f55	feat(api): Cancel requests when client disconnects	2024-11-30 19:24:33 +00:00
TheOnlyWayUp	36c73d01e9	fix(api): Pydantic-settings for model-based env loading	2024-11-30 19:23:46 +00:00
TheOnlyWayUp	48fed5f0ce	fix(api): Clean cached session usage	2024-11-30 16:54:14 +00:00
TheOnlyWayUp	e3028867db	fix(api): Default values for cache model	2024-11-30 16:53:22 +00:00
TheOnlyWayUp	b1aa836254	feat(api): Add env config	2024-11-30 16:02:01 +00:00
TheOnlyWayUp	5ecbe028c3	feat(api): Conform to PEP 621 Start using Ruff/uv	2024-11-30 16:00:34 +00:00
Dhanush R	96877d9c9b	feat(api): Descriptive error messages (#21 - @AaronBenDaniel) Co-authored-by: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com>	2024-11-28 18:40:13 +00:00
TheOnlyWayUp	f9e27689e3	feat(api): Use FastAPI Error handler	2024-11-28 18:23:52 +00:00
AaronBenDaniel	308afde25f	fix(api): Handle invalid part IDs	2024-11-24 21:42:52 -05:00
AaronBenDaniel	fa1bac3045	feat(api): Add rate-limiting error message	2024-11-09 14:39:21 -05:00
AaronBenDaniel	d58a119c10	feat(api): Invalid ID error message	2024-11-08 17:43:11 -05:00
Dhanush R	31b8d0c08c	Update demo image on README	2024-11-09 03:27:09 +05:30
Dhanush R	40ae0fbb99	Update README.md	2024-11-09 00:15:53 +05:30
AaronBenDaniel	af0981a679	fix(frontend): Help Modal updated for URLs (#18 - @AaronBenDaniel) * fixed help modal * fix(frontend): Update Help Modal --------- Co-authored-by: TheOnlyWayUp <hi@towu.dev>	2024-11-08 23:12:38 +05:30
Dhanush R	fc4866463f	fix(frontend): Update donate link	2024-11-08 23:10:19 +05:30
AaronBenDaniel	ca4697057c	feat: Paste Links, Deprecate IDs (#17 - @AaronBenDaniel) * deprecate Story IDs, require full URLs * added FRONT-END ONLY support for part and list URLs * add backend support for part IDs * added backend support for lists * Support enums * Simplify and remove List support * Update frontend * Frontend: Revert dialog changes * Remove List support --------- Co-authored-by: TheOnlyWayUp <hi@towu.dev>	2024-11-07 08:39:34 +00:00
AaronBenDaniel	e89dc7e699	Update featured image (#13 - @AaronBenDaniel) * update featured image * changed page format	2024-11-03 05:02:45 +05:30
Dhanush R	d9c858b3b3	fix(api) - #11 Send to Kindle Support * fix(api/image_downloads): Replace image url with file path * fix(api/image_downloads): Add comments * fix(frontend): Update changelog * Support Send2Kindle * Update changelog	2024-11-03 04:52:30 +05:30
Dhanush R	c0695a9d17	fix(api/images): #14 - Image downloads functional * fix(api/image_downloads): Replace image url with file path * fix(api/image_downloads): Add comments * fix(frontend): Update changelog	2024-11-02 03:07:41 +05:30
TheOnlyWayUp	75d42ba5ec	fix: Style Discord Bot link	2024-10-06 08:33:11 +00:00
TheOnlyWayUp	33d6d912a2	feat: Add Discord Bot link	2024-10-06 06:10:45 +00:00
TheOnlyWayUp	9d7464b461	fix(frontend): Remove feedbackfish script	2024-09-17 18:23:26 +00:00
AaronBenDaniel	232795b050	fix(frontend): Download more button (#12 - @AaronBenDaniel) * Fixed "Download More" button * Revert "Fixed "Download More" button" This reverts commit `620ad6afff`. * Reworked page reset * fix(frontend): Download more button --------- Co-authored-by: TheOnlyWayUp <hi@towu.dev>	2024-08-31 13:56:54 +05:30
TheOnlyWayUp	85bc4609c2	fix(frontend): Remove Query Params from ID-from-URL extraction	2024-07-11 15:28:45 +00:00
TheOnlyWayUp	3369325d03	fix(frontend): Populate download URL, accidentally removed	2024-07-10 14:06:06 +00:00