4 Commits

Author SHA1 Message Date
TheOnlyWayUp 405cb7bcfe Exclude .epub from .gitignore 2024-07-08 12:15:12 +00:00
TheOnlyWayUp d819db27d3 Update VSCode settings.json 2024-07-08 12:14:59 +00:00
TheOnlyWayUp 3edd35829d feat(api/tests): Story testing 2024-07-08 12:14:38 +00:00
TheOnlyWayUp 0eedef7653 fix(api): Make package, for importing in tests 2024-07-08 12:14:08 +00:00
18 changed files with 297 additions and 1913 deletions
-5
View File
@@ -1,10 +1,5 @@
__pycache__ __pycache__
venv venv
*epub
data data
*ipynb *ipynb
build build
.vscode
.venv
.env
*log
+7 -1
View File
@@ -1,3 +1,9 @@
{ {
"python.analysis.autoImportCompletions": true "python.analysis.autoImportCompletions": true,
"vscord.app.privacyMode.enable": false,
"python.testing.pytestArgs": [
"src"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
} }
-4
View File
@@ -12,10 +12,6 @@ RUN npm run build
FROM python:3.10-slim FROM python:3.10-slim
WORKDIR /app WORKDIR /app
# Install git
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
COPY src/api/requirements.txt requirements.txt COPY src/api/requirements.txt requirements.txt
RUN pip3 install -r requirements.txt RUN pip3 install -r requirements.txt
COPY --from=0 /build/build /app/build COPY --from=0 /build/build /app/build
+4 -19
View File
@@ -2,19 +2,18 @@ WattpadDownloader ([Demo](https://wpd.rambhat.la))
--- ---
Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files. Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files.
![image](https://github.com/user-attachments/assets/b9d87d6b-5302-4561-98b0-d7f95bff9f04) ![image](https://github.com/TheOnlyWayUp/WattpadDownloader/assets/76237496/8a3fda0b-b851-4c5f-9306-ba9c17cdcc8b)
Stars ⭐ are appreciated. Thanks! Stars ⭐ are appreciated. Thanks!
## Features ## Features
- ⚡ Lightweight Frontend and Minimal Javascript. - ⚡ Lightweight Frontend and Minimal Javascript.
- 🪙 Supports Authentication (Download paid stories from your account!)
- 🌐 API Support (Visit the `/docs` path on your instance for more.) - 🌐 API Support (Visit the `/docs` path on your instance for more.)
- 🐇 Fast Generation, Ratelimit Handling. - 🐇 Fast Generation, Basic Ratelimit Handling.
- 🐳 Docker Support - 🐳 Docker Support
- 🏷️ Generated EPUB File includes Metadata. (Dublin Core Spec) - 🏷️ Generated EPUB File includes Metadata. (Dublin Core Spec)
- 📖 Plays well with E-Readers. (Kindle Support with Send2Kindle, ReMarkable, KOBO, KOReader...) - 📖 Plays well with E-Readers. (Kindle Support if KOReader present)
- 💻 Easily Hackable. Extend with ease. - 💻 Easily Hackable. Extend with ease.
@@ -25,20 +24,6 @@ Stars ⭐ are appreciated. Thanks!
That's it! You can use your instance at `http://localhost:5042`. API Documentation is available at `http://localhost:5042/docs`. That's it! You can use your instance at `http://localhost:5042`. API Documentation is available at `http://localhost:5042/docs`.
### Concurrent Requests
The file-based cache struggles with concurrent requests (discussed in TheOnlyWayUp/WattpadDownloader#2 and TheOnlyWayUp/WattpadDownloader#22). If you're downloading a large number of books concurrently, switch to the Redis cache. Assuming you've built the image already:
1. Fill the .env file. Localhost will not work in a docker container unless [`host.docker.internal`](https://docs.docker.com/desktop/features/networking/#i-want-to-connect-from-a-container-to-a-service-on-the-host) or a platform-specific variant is provided.
```
USE_CACHE=true
CACHE_TYPE=redis
REDIS_CONNECTION_URL=redis://username:password@host:port
```
2. Run the container and supply the .env file, `docker run -d -p 5042:80 --env-file .env wp_downloader`
Alternatively, if Redis is running on localhost
2. Modify your `.env` file, replacing `localhost` with `host.docker.internal`. `redis://localhost:6379` should become `redis://host.docker.internal:6379`. Then, start the container, `docker run -d -p 5042:80 --env-file .env --add-host host.docker.internal:host-gateway wp_downloader`
--- ---
My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fast and well-documented package. My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fast and well-documented package.
@@ -46,5 +31,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
--- ---
<div align="center"> <div align="center">
<p>TheOnlyWayUp © 2024</p> <p>TheOnlyWayUp © 2023</p>
</div> </div>
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 264 KiB

-3
View File
@@ -1,3 +0,0 @@
USE_CACHE=true
CACHE_TYPE=file
REDIS_CONNECTION_URL=
-1
View File
@@ -1 +0,0 @@
3.10
-26
View File
@@ -1,26 +0,0 @@
[project]
name = "api"
version = "0.1.0"
description = "Wattpad Downloader API"
readme = "../../README.md"
requires-python = ">=3.10"
dependencies = [
"aiohttp>=3.9.1",
"rich>=13.9.4",
"fastapi>=0.115.5",
"ebooklib>=0.18",
"python-dotenv>=1.0.1",
"pydantic-settings>=2.6.1",
"eliot>=1.16.0",
"type-extensions>=0.1.2",
"backoff>=2.2.1",
"aiohttp-client-cache[all]",
"bs4>=0.0.2",
"uvicorn>=0.32.1",
]
[tool.ruff.lint]
ignore = ['E402']
[tool.uv.sources]
aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-cache.git", rev = "keydb-ttl" }
+47 -46
View File
@@ -1,61 +1,62 @@
aioboto3==13.2.0 aiofiles==23.2.1
aiobotocore==2.15.2 aiohttp==3.9.1
aiofiles==24.1.0 aiohttp-client-cache==0.10.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.9
aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6
aioitertools==0.12.0
aiosignal==1.3.1 aiosignal==1.3.1
aiosqlite==0.20.0 aiosqlite==0.19.0
annotated-types==0.7.0 annotated-types==0.6.0
anyio==4.6.2.post1 anyio==4.2.0
asttokens==2.4.1
async-timeout==4.0.3 async-timeout==4.0.3
attrs==23.1.0 attrs==23.1.0
backoff==2.2.1 backoff==2.2.1
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
boltons==24.1.0
boto3==1.35.36
botocore==1.35.36
bs4==0.0.2 bs4==0.0.2
click==8.1.7 click==8.1.7
dnspython==2.7.0 comm==0.2.0
ebooklib==0.18 debugpy==1.8.0
eliot==1.16.0 decorator==5.1.1
exceptiongroup==1.2.2 EbookLib==0.18
fastapi==0.115.5 exceptiongroup==1.2.0
executing==2.0.1
fastapi==0.108.0
frozenlist==1.4.1 frozenlist==1.4.1
h11==0.14.0 h11==0.14.0
idna==3.6 idna==3.6
itsdangerous==2.2.0 ipykernel==6.28.0
jmespath==1.0.1 ipython==8.19.0
lxml==5.3.0 itsdangerous==2.1.2
jedi==0.19.1
jupyter_client==8.6.0
jupyter_core==5.5.1
lxml==4.9.4
markdown-it-py==3.0.0 markdown-it-py==3.0.0
matplotlib-inline==0.1.6
mdurl==0.1.2 mdurl==0.1.2
motor==3.6.0
multidict==6.0.4 multidict==6.0.4
orjson==3.10.12 nest-asyncio==1.5.8
propcache==0.2.1 packaging==23.2
pydantic==2.10.2 parso==0.8.3
pydantic-core==2.27.1 pexpect==4.9.0
pydantic-settings==2.6.1 platformdirs==4.1.0
pygments==2.18.0 prompt-toolkit==3.0.43
pymongo==4.9.2 psutil==5.9.7
pyrsistent==0.20.0 ptyprocess==0.7.0
python-dateutil==2.9.0.post0 pure-eval==0.2.2
python-dotenv==1.0.1 pydantic==2.5.3
redis==5.2.0 pydantic_core==2.14.6
rich==13.9.4 Pygments==2.17.2
s3transfer==0.10.4 python-dateutil==2.8.2
setuptools==75.6.0 pyzmq==25.1.2
rich==13.7.0
six==1.16.0 six==1.16.0
sniffio==1.3.1 sniffio==1.3.0
soupsieve==2.6 soupsieve==2.5
starlette==0.41.3 stack-data==0.6.3
type-extensions==0.1.2 starlette==0.32.0.post1
typing-extensions==4.12.2 tornado==6.4
traitlets==5.14.0
typing_extensions==4.9.0
url-normalize==1.4.3 url-normalize==1.4.3
urllib3==2.2.3 uvicorn==0.25.0
uvicorn==0.32.1 wcwidth==0.2.12
wrapt==1.17.0 yarl==1.9.4
yarl==1.18.3
zope-interface==7.2
+89 -224
View File
@@ -1,104 +1,61 @@
from typing import List, Optional, Tuple import asyncio
from typing_extensions import TypedDict from typing import Optional
import re
import unicodedata
import logging
from os import environ
from enum import Enum
import backoff
from eliot import to_file, start_action
from eliot.stdlib import EliotHandler
from dotenv import load_dotenv
from ebooklib import epub from ebooklib import epub
from ebooklib.epub import EpubBook import unicodedata
from bs4 import BeautifulSoup import re
from pydantic import TypeAdapter, model_validator, field_validator import backoff
from pydantic_settings import BaseSettings from aiohttp import ClientResponseError, ClientSession
from aiohttp import ClientResponseError
from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache.session import CachedSession
from aiohttp_client_cache import FileBackend, RedisBackend from aiohttp_client_cache import FileBackend
from bs4 import BeautifulSoup
load_dotenv(override=True)
handler = EliotHandler()
logging.getLogger("fastapi").setLevel(logging.INFO)
logging.getLogger("fastapi").addHandler(handler)
if environ.get("DEBUG"):
to_file(open("eliot.log", "wb"))
logger = logging.Logger("wpd")
logger.addHandler(handler)
# --- #
class CacheTypes(Enum):
file = "file"
redis = "redis"
class Config(BaseSettings):
USE_CACHE: bool = True
CACHE_TYPE: CacheTypes = CacheTypes.file
REDIS_CONNECTION_URL: str = ""
@field_validator("USE_CACHE", mode="before")
def validate_use_cache(cls, value):
# Return default if value is an empty string
if value == "":
return True # Default value for USE_CACHE
return value
@field_validator("CACHE_TYPE", mode="before")
def validate_cache_type(cls, value):
# Thanks https://stackoverflow.com/a/78157474
if value == "":
return "file"
return value
@model_validator(mode="after")
def prevent_mismatched_redis_url(self):
match self.CACHE_TYPE:
case CacheTypes.file:
if self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
)
case CacheTypes.redis:
if not self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
)
return self
config = Config()
# --- #
headers = { headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
} }
if config.USE_CACHE:
match config.CACHE_TYPE:
case CacheTypes.file:
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
case CacheTypes.redis:
cache = RedisBackend(
cache_name="wpd-aiohttp-cache",
address=config.REDIS_CONNECTION_URL,
expire_after=43200, # 12 hours
)
else:
cache = None
logger.info(f"Using {cache=}")
# --- Utilities --- # # --- Utilities --- #
async def wp_get_cookies(username: str, password: str) -> dict:
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
Args:
username (str): Username.
password (str): Password.
Raises:
ValueError: Bad status code.
ValueError: No cookies returned.
Returns:
dict: Authorization cookies.
"""
async with ClientSession(headers=headers) as session:
async with session.post(
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
data={
"username": username.lower(),
"password": password,
}, # the username.lower() is for caching
) as response:
if response.status != 204:
raise ValueError("Not a 204.")
cookies = {
k: v.value
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
}
if not cookies:
raise ValueError("No cookies.")
return cookies
def slugify(value, allow_unicode=False) -> str: def slugify(value, allow_unicode=False) -> str:
""" """
Taken from https://github.com/django/django/blob/master/django/utils/text.py Taken from https://github.com/django/django/blob/master/django/utils/text.py
@@ -122,128 +79,44 @@ def slugify(value, allow_unicode=False) -> str:
return re.sub(r"[-\s]+", "-", value).strip("-_") return re.sub(r"[-\s]+", "-", value).strip("-_")
async def wp_get_cookies(username: str, password: str) -> dict:
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
Args:
username (str): Username.
password (str): Password.
Raises:
ValueError: Bad status code.
ValueError: No cookies returned.
Returns:
dict: Authorization cookies.
"""
with start_action(action_type="api_fetch_cookies"):
async with CachedSession(headers=headers, cache=None) as session:
async with session.post(
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
data={
"username": username.lower(),
"password": password,
}, # the username.lower() is for caching
) as response:
if response.status != 204:
raise ValueError("Not a 204.")
cookies = {
k: v.value
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
}
if not cookies:
raise ValueError("No cookies.")
return cookies
# --- Models --- #
class Language(TypedDict):
name: str
class User(TypedDict):
username: str
class Part(TypedDict):
id: int
title: str
class Story(TypedDict):
id: str
title: str
createDate: str
modifyDate: str
language: Language
user: User
description: str
cover: str
completed: bool
tags: List[str]
mature: bool
url: str
parts: List[Part]
isPaywalled: bool
story_ta = TypeAdapter(Story)
# --- API Calls --- # # --- API Calls --- #
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_from_partId( async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
part_id: int, cookies: Optional[dict] = None
) -> Tuple[str, Story]:
"""Return a Story ID from a Part ID."""
with start_action(action_type="api_fetch_storyFromPartId"):
async with CachedSession(
headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover)"
) as response:
response.raise_for_status()
body = await response.json()
return str(body["groupId"]), story_ta.validate_python(body["group"])
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> Story:
"""Taking a story_id, return its information from the Wattpad API.""" """Taking a story_id, return its information from the Wattpad API."""
with start_action(action_type="api_fetch_story", story_id=story_id): async with (
async with CachedSession( CachedSession(headers=headers, cache=cache)
headers=headers, cookies=cookies, cache=None if cookies else cache if not cookies
) as session: else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover" f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
) as response: ) as response:
if not response.ok:
if response.status in [404, 400]:
return {}
response.raise_for_status() response.raise_for_status()
body = await response.json() body = await response.json()
return story_ta.validate_python(body) return body
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str: async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
"""Return the HTML Content of a Part.""" """Return the HTML Content of a Part."""
with start_action(action_type="api_fetch_partContent", part_id=part_id): async with (
async with CachedSession( CachedSession(headers=headers, cache=cache)
headers=headers, cookies=cookies, cache=None if cookies else cache if not cookies
) as session: else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get( async with session.get(
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}" f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
) as response: ) as response:
if not response.ok:
if response.status in [404, 400]:
return ""
response.raise_for_status() response.raise_for_status()
body = await response.text() body = await response.text()
@@ -252,13 +125,17 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_cover(url: str) -> bytes: async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
"""Fetch cover image bytes.""" """Fetch image bytes."""
with start_action(action_type="api_fetch_cover", url=url): async with (
async with CachedSession( CachedSession(headers=headers, cache=cache)
headers=headers, cache=None if not cookies
) as session: # Don't cache images. else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get(url) as response: async with session.get(url) as response:
if not response.ok:
if response.status in [404, 400]:
return bytes()
response.raise_for_status() response.raise_for_status()
body = await response.read() body = await response.read()
@@ -269,13 +146,11 @@ async def fetch_cover(url: str) -> bytes:
# --- EPUB Generation --- # # --- EPUB Generation --- #
def set_metadata(book: EpubBook, data: Story) -> None: def set_metadata(book, data):
"""Set book metadata."""
book.add_author(data["user"]["username"]) book.add_author(data["user"]["username"])
book.add_metadata("DC", "title", data["title"])
book.add_metadata("DC", "description", data["description"]) book.add_metadata("DC", "description", data["description"])
book.add_metadata("DC", "date", data["createDate"]) book.add_metadata("DC", "created", data["createDate"])
book.add_metadata("DC", "modified", data["modifyDate"]) book.add_metadata("DC", "modified", data["modifyDate"])
book.add_metadata("DC", "language", data["language"]["name"]) book.add_metadata("DC", "language", data["language"]["name"])
@@ -290,56 +165,46 @@ def set_metadata(book: EpubBook, data: Story) -> None:
) )
async def set_cover(book: EpubBook, data: Story) -> None: async def set_cover(book, data, cookies: Optional[dict] = None):
"""Set book cover.""" book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page
)
chapter.set_content('<img src="cover.jpg">')
async def add_chapters( async def add_chapters(
book: EpubBook, book, data, download_images: bool = False, cookies: Optional[dict] = None
data: Story,
download_images: bool = False,
cookies: Optional[dict] = None,
): ):
chapters = [] chapters = []
for cidx, part in enumerate(data["parts"]): for part in data["parts"]:
content = await fetch_part_content(part["id"], cookies=cookies) content = await fetch_part_content(part["id"], cookies=cookies)
title = part["title"] title = part["title"]
clean_title = slugify(title)
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
chapter = epub.EpubHtml( chapter = epub.EpubHtml(
title=title, title=title,
file_name=f"{cidx}.xhtml", # Used to be clean_title.xhtml, but that broke Arabic support as slugify turns arabic strings into '', leading to multiple files with the same name, breaking those chapters. file_name=f"{clean_title}.xhtml",
lang=data["language"]["name"], lang=data["language"]["name"],
) )
if download_images: if download_images:
soup = BeautifulSoup(content, "lxml") soup = BeautifulSoup(content, "lxml")
async with (
async with CachedSession( CachedSession(headers=headers, cache=cache)
headers=headers, cache=None if not cookies
) as session: # Don't cache images. else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
for idx, image in enumerate(soup.find_all("img")): for idx, image in enumerate(soup.find_all("img")):
if not image["src"]: if not image["src"]:
continue continue
# Find all image tags and filter for those with sources
async with session.get(image["src"]) as response: async with session.get(image["src"]) as response:
img = epub.EpubImage( img = epub.EpubImage(
media_type="image/jpeg", media_type="image/jpeg",
content=await response.read(), content=await response.read(),
file_name=f"static/{cidx}/{idx}.jpeg", file_name=f"static/{clean_title}/{idx}.jpeg",
) )
book.add_item(img) book.add_item(img)
# Fetch image and pack
content = content.replace( content = content.replace(
str(image["src"]), f"static/{cidx}/{idx}.jpeg" str(image), f'<img src="static/{clean_title}/{idx}.jpeg"/>'
) )
chapter.set_content(f"<h1>{title}</h1>" + content) chapter.set_content(f"<h1>{title}</h1>" + content)
@@ -351,7 +216,7 @@ async def add_chapters(
for chapter in chapters: for chapter in chapters:
book.add_item(chapter) book.add_item(chapter)
book.toc = chapters book.toc = tuple(chapters)
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNcx())
+25 -111
View File
@@ -1,16 +1,7 @@
"""WattpadDownloader API Server."""
from typing import Optional from typing import Optional
import asyncio
import tempfile
from pathlib import Path from pathlib import Path
from io import BytesIO from fastapi import FastAPI, HTTPException
from enum import Enum
from eliot import start_action
from aiohttp import ClientResponseError
from fastapi import FastAPI, Request
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from ebooklib import epub from ebooklib import epub
from create_book import ( from create_book import (
retrieve_story, retrieve_story,
@@ -19,117 +10,37 @@ from create_book import (
add_chapters, add_chapters,
slugify, slugify,
wp_get_cookies, wp_get_cookies,
fetch_story_from_partId,
logger,
) )
import tempfile
from io import BytesIO
from fastapi.staticfiles import StaticFiles
app = FastAPI() app = FastAPI()
BUILD_PATH = Path(__file__).parent / "build" BUILD_PATH = Path(__file__).parent / "build"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
class RequestCancelledMiddleware:
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
def __init__(self, app):
self.app = app
async def __call__(self, scope, receive, send):
if scope["type"] != "http":
await self.app(scope, receive, send)
return
# Let's make a shared queue for the request messages
queue = asyncio.Queue()
async def message_poller(sentinel, handler_task):
nonlocal queue
while True:
message = await receive()
if message["type"] == "http.disconnect":
handler_task.cancel()
return sentinel # Break the loop
# Puts the message in the queue
await queue.put(message)
sentinel = object()
handler_task = asyncio.create_task(self.app(scope, queue.get, send))
asyncio.create_task(message_poller(sentinel, handler_task))
try:
return await handler_task
except asyncio.CancelledError:
logger.info("Cancelling task as connection closed")
app.add_middleware(RequestCancelledMiddleware)
class DownloadMode(Enum):
story = "story"
part = "part"
@app.get("/") @app.get("/")
def home(): def home():
return FileResponse(BUILD_PATH / "index.html") return FileResponse(BUILD_PATH / "index.html")
@app.exception_handler(ClientResponseError) @app.get("/download/{story_id}")
def download_error_handler(request: Request, exception: ClientResponseError): async def download_book(
match exception.status: story_id: int,
case 400 | 404:
return HTMLResponse(
status_code=404,
content='This story does not exist, or has been deleted. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
case 429:
# Rate-limit by Wattpad
return HTMLResponse(
status_code=429,
content='The website is overloaded. Please try again in a few minutes. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
case _:
# Unhandled error
return HTMLResponse(
status_code=500,
content='Something went wrong. Yell at me on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
@app.get("/download/{download_id}")
async def handle_download(
download_id: int,
download_images: bool = False, download_images: bool = False,
mode: DownloadMode = DownloadMode.story,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
):
with start_action(
action_type="download",
download_id=download_id,
download_images=download_images,
mode=mode,
): ):
if username and not password or password and not username: if username and not password or password and not username:
logger.error(
"Username with no Password or Password with no Username provided."
)
return HTMLResponse( return HTMLResponse(
status_code=422, status_code=422,
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>', content='Include both the username _and_ password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
) )
if username and password: if username and password:
# username and password are URL-Encoded by the frontend. FastAPI automatically decodes them.
try: try:
cookies = await wp_get_cookies(username=username, password=password) cookies = await wp_get_cookies(username=username, password=password)
except ValueError: except ValueError:
logger.error("Invalid username or password.")
return HTMLResponse( return HTMLResponse(
status_code=403, status_code=403,
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>', content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
@@ -137,22 +48,25 @@ async def handle_download(
else: else:
cookies = None cookies = None
match mode: data = await retrieve_story(story_id, cookies=cookies)
case DownloadMode.story:
story_id = download_id
metadata = await retrieve_story(story_id, cookies)
case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
logger.info(f"Retrieved story id ({story_id=})")
book = epub.EpubBook() book = epub.EpubBook()
set_metadata(book, metadata)
await set_cover(book, metadata)
try:
set_metadata(book, data)
except KeyError:
return HTMLResponse(
status_code=404,
content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
await set_cover(book, data, cookies=cookies)
# print("Metadata Downloaded")
# Chapters are downloaded
async for title in add_chapters( async for title in add_chapters(
book, metadata, download_images=download_images, cookies=cookies book, data, download_images=download_images, cookies=cookies
): ):
# print(f"Part ({title}) downloaded")
... ...
# Book is compiled # Book is compiled
@@ -170,7 +84,7 @@ async def handle_download(
BytesIO(book_data), BytesIO(book_data),
media_type="application/epub+zip", media_type="application/epub+zip",
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(data["title"])}_{story_id}_{"images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
}, },
) )
@@ -181,4 +95,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16) uvicorn.run(app, host="0.0.0.0", port=80)
+31
View File
@@ -0,0 +1,31 @@
from .. import create_book
import pytest
STORY_ID = 372219540
@pytest.mark.asyncio
async def test_retrieve_story():
story_data = await create_book.retrieve_story(STORY_ID)
story_data.pop("modifyDate", None) # Subject to change
response = {
"id": "372219540",
"title": "WPD Test",
"createDate": "2024-07-02T15:29:13Z",
# "modifyDate": "2024-07-02T15:41:26Z",
"language": {"name": "English"},
"user": {"username": "KindaAssNgl"},
"description": "Testing story for WPD.",
"cover": r"https:\/\/img.wattpad.com\/cover\/372219540-256-k908955.jpg",
"completed": False,
"tags": ["testing", "towu", "wpd"],
"mature": False,
"url": r"https:\/\/www.wattpad.com\/story\/372219540-wpd-test",
"parts": [{"id": 1458516761, "title": "Ganesh"}],
"isPaywalled": False,
}
assert story_data == response
-1294
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -25,6 +25,8 @@
<!-- Meta Tags Generated with https://metatags.io --> <!-- Meta Tags Generated with https://metatags.io -->
<script defer src="https://feedback.fish/ff.js?pid=f8df016d4ffdfb"></script>
%sveltekit.head% %sveltekit.head%
</head> </head>
<body data-sveltekit-preload-data="hover"> <body data-sveltekit-preload-data="hover">
+2 -2
View File
@@ -18,10 +18,10 @@
<aside> <aside>
<div class="grid grid-cols-3 max-w-lg w-full"> <div class="grid grid-cols-3 max-w-lg w-full">
<a <a
href="https://patreon.com/theonlywayup" href="https://liberapay.com/TheOnlyWayUp/"
target="_blank" target="_blank"
class="link" class="link"
data-umami-event="Footer Donate">Patreon</a data-umami-event="Footer Donate">Donate</a
> >
<a <a
href="https://rambhat.la" href="https://rambhat.la"
+33 -120
View File
@@ -1,81 +1,26 @@
<script> <script>
let story_id = "";
let download_images = false; let download_images = false;
let is_paid_story = false; let is_paid_story = false;
let invalid_url = false;
let after_download_page = false;
let credentials = { let credentials = {
username: "", username: "",
password: "", password: "",
}; };
let download_id = "";
let mode = ""; let after_download_page = false;
let input_url = ""; let url = "";
let button_disabled = false; let button_disabled = false;
$: button_disabled = $: button_disabled =
!input_url || !story_id ||
(is_paid_story && !(credentials.username && credentials.password)); (is_paid_story && !(credentials.username && credentials.password));
$: url = $: url =
`/download/` + `/download/${story_id}?om=1` +
download_id +
`?om=1` +
(download_images ? "&download_images=true" : "") + (download_images ? "&download_images=true" : "") +
(is_paid_story (is_paid_story
? `&username=${encodeURIComponent(credentials.username)}&password=${encodeURIComponent(credentials.password)}` ? `&username=${credentials.username}&password=${credentials.password}`
: "") + : "");
`&mode=${mode}`;
$: {
if (input_url.length) {
input_url = input_url.toLowerCase();
invalid_url = false;
if (/^\d+$/.test(input_url)) {
// All numbers
download_id = input_url;
mode = "story";
} else if (input_url.includes("wattpad.com/")) {
// Is a string and contains contain wattpad.com/
if (input_url.includes("/story/")) {
// https://wattpad.com/story/237369078-wattpad-books-presents
input_url = input_url.split("-")[0].split("?")[0].split("/story/")[1]; // removes tracking fields and title
download_id = input_url;
mode = "story";
} else if (input_url.includes("/stories/")) {
// https://www.wattpad.com/api/v3/stories/237369078?fields=...
input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
download_id = input_url;
mode = "story";
} else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input_url = input_url.split("-")[0].split("?")[0].split("wattpad.com/")[1]; // removes tracking fields and title
download_id = input_url;
if (/^\d+$/.test(download_id)) {
// If "wattpad.com/{download_id}" contains only numbers
mode = "part";
} else {
invalid_url = true;
input_url = "";
download_id = "";
}
}
} else {
invalid_url = true;
}
input_url = input_url.match(/\d+/g)?.join("") || "";
download_id = input_url;
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
} else {
invalid_url = false;
download_id = "";
}
}
</script> </script>
<div> <div>
@@ -94,54 +39,27 @@
Download your favourite books with a single click! Download your favourite books with a single click!
</p> </p>
<ul class="pt-4 list list-inside text-xl"> <ul class="pt-4 list list-inside text-xl">
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. --> <li>06/24 - 🎉 Image Downloading!</li>
<li>12/24 - 📂 Improved Performance</li>
<li>11/24 - 🔗 Paste Links!</li>
<li>11/24 - 📨 Send to Kindle Support!</li>
<li>11/24 - ⚒️ Fix Image Downloads</li>
<li>
10/24 - 👾 Add the <a
href="https://discord.com/oauth2/authorize?client_id=1292173380065296395&permissions=274878285888&scope=bot%20applications.commands"
target="_blank"
class="link underline">Discord Bot</a
>!
</li>
<li>07/24 - 🔡 RTL Language support! (Arabic, etc.)</li>
<li>06/24 - 🔑 Authenticated Downloads!</li>
<li>06/24 - 🖼️ Image Downloading!</li>
</ul> </ul>
</div> </div>
<div class="card shrink-0 w-full max-w-sm shadow-2xl bg-base-100"> <div class="card shrink-0 w-full max-w-sm shadow-2xl bg-base-100">
<form class="card-body"> <form class="card-body">
<div class="form-control"> <div class="form-control">
<input <input
type="text" type="number"
placeholder="Story URL" placeholder="Story ID"
class="input input-bordered" class="input input-bordered"
class:input-warning={invalid_url} bind:value={story_id}
bind:value={input_url}
required required
name="input_url" name="story_id"
/> />
<label class="label" for="input_url"> <label class="label" for="story_id">
{#if invalid_url}
<p class=" text-red-500">
Refer to (<button
class="link font-semibold"
onclick="StoryURLTutorialModal.showModal()"
data-umami-event="Part StoryURLTutorialModal Open"
>How to get a Story URL</button
>).
</p>
{:else}
<button <button
class="label-text link font-semibold" class="label-text link font-semibold"
onclick="StoryURLTutorialModal.showModal()" onclick="StoryIDTutorialModal.showModal()"
data-umami-event="StoryURLTutorialModal Open" data-umami-event="StoryIDTutorialModal Open"
>How to get a Story URL</button >How to get a Story ID</button
> >
{/if}
</label> </label>
<label class="cursor-pointer label"> <label class="cursor-pointer label">
<span class="label-text" <span class="label-text"
@@ -233,13 +151,7 @@
>, where we release features early and discuss updates. >, where we release features early and discuss updates.
</p> </p>
</div> </div>
<button <a href="/" class="btn btn-outline btn-lg mt-10">Download More</a>
on:click={() => {
after_download_page = false;
input_url = "";
}}
class="btn btn-outline btn-lg mt-10">Download More</button
>
</div> </div>
{/if} {/if}
</div> </div>
@@ -248,31 +160,32 @@
<!-- Open the modal using ID.showModal() method --> <!-- Open the modal using ID.showModal() method -->
<dialog id="StoryURLTutorialModal" class="modal"> <dialog id="StoryIDTutorialModal" class="modal">
<div class="modal-box"> <div class="modal-box">
<form method="dialog"> <form method="dialog">
<button class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2" <button class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2"
></button ></button
> >
</form> </form>
<h3 class="font-bold text-lg">Finding the Story URL</h3> <h3 class="font-bold text-lg">Downloading a Story</h3>
<ol class="list list-disc list-inside py-4 space-y-4"> <ol class="list list-disc list-inside py-4 space-y-2">
<li> <li>
Copy the URL from the Website, or hit share and copy the URL on the App. Open the Story URL (For example, <span
class="font-mono bg-slate-100 p-1"
>wattpad.com/story/237369078-wattpad-books-presents</span
>)
</li> </li>
<li> <li>
For example, Copy the numbers after the <span class="font-mono bg-slate-100 p-1"
>/</span
>
(In the example, that'd be,
<span class="font-mono bg-slate-100 p-1" <span class="font-mono bg-slate-100 p-1"
>wattpad.com/<span class="bg-amber-200 rounded-sm">story</span >wattpad.com/story/<span class="bg-amber-200 p-1">237369078</span
>/237369078-wattpad-books-presents</span >-wattpad-books-presents</span
>. >)
</li> </li>
<li> <li>Paste the Story ID and hit Download!</li>
<span class="font-mono bg-slate-100 p-1"
>https://www.wattpad.com/939103774-given</span
> is okay too.
</li>
<li>Paste the URL and hit Download!</li>
</ol> </ol>
</div> </div>
<form method="dialog" class="modal-backdrop"> <form method="dialog" class="modal-backdrop">