Compare commits
45 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f8900be6b3 | |||
| a458b9c2f1 | |||
| 18d4df0674 | |||
| c1db7babdd | |||
| f40d1e4b27 | |||
| 39837f6305 | |||
| 974c0bd341 | |||
| 5687c5f2cd | |||
| 5f0676a19d | |||
| ec700ce284 | |||
| eafef1f1ec | |||
| 8e8773a61a | |||
| 2b1d00b08e | |||
| c29c26b33b | |||
| f91a01e574 | |||
| a31c26f8c5 | |||
| 8b00d0b109 | |||
| 26b9db8945 | |||
| a755ddb0e4 | |||
| 28e40ece94 | |||
| 6e222c1f55 | |||
| 36c73d01e9 | |||
| 48fed5f0ce | |||
| e3028867db | |||
| b1aa836254 | |||
| 5ecbe028c3 | |||
| 96877d9c9b | |||
| f9e27689e3 | |||
| 308afde25f | |||
| fa1bac3045 | |||
| d58a119c10 | |||
| 31b8d0c08c | |||
| 40ae0fbb99 | |||
| af0981a679 | |||
| fc4866463f | |||
| ca4697057c | |||
| e89dc7e699 | |||
| d9c858b3b3 | |||
| c0695a9d17 | |||
| 75d42ba5ec | |||
| 33d6d912a2 | |||
| 9d7464b461 | |||
| 232795b050 | |||
| 85bc4609c2 | |||
| 3369325d03 |
@@ -4,3 +4,7 @@ venv
|
||||
data
|
||||
*ipynb
|
||||
build
|
||||
.vscode
|
||||
.venv
|
||||
.env
|
||||
*log
|
||||
|
||||
@@ -12,6 +12,10 @@ RUN npm run build
|
||||
FROM python:3.10-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install git
|
||||
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY src/api/requirements.txt requirements.txt
|
||||
RUN pip3 install -r requirements.txt
|
||||
COPY --from=0 /build/build /app/build
|
||||
|
||||
@@ -2,7 +2,7 @@ WattpadDownloader ([Demo](https://wpd.rambhat.la))
|
||||
---
|
||||
Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
Stars ⭐ are appreciated. Thanks!
|
||||
@@ -14,7 +14,7 @@ Stars ⭐ are appreciated. Thanks!
|
||||
- 🐇 Fast Generation, Ratelimit Handling.
|
||||
- 🐳 Docker Support
|
||||
- 🏷️ Generated EPUB File includes Metadata. (Dublin Core Spec)
|
||||
- 📖 Plays well with E-Readers. (Kindle Support if KOReader present, ReMarkable, KOBO, ...)
|
||||
- 📖 Plays well with E-Readers. (Kindle Support with Send2Kindle, ReMarkable, KOBO, KOReader...)
|
||||
- 💻 Easily Hackable. Extend with ease.
|
||||
|
||||
|
||||
@@ -25,6 +25,20 @@ Stars ⭐ are appreciated. Thanks!
|
||||
|
||||
That's it! You can use your instance at `http://localhost:5042`. API Documentation is available at `http://localhost:5042/docs`.
|
||||
|
||||
### Concurrent Requests
|
||||
The file-based cache struggles with concurrent requests (discussed in TheOnlyWayUp/WattpadDownloader#2 and TheOnlyWayUp/WattpadDownloader#22). If you're downloading a large number of books concurrently, switch to the Redis cache. Assuming you've built the image already:
|
||||
1. Fill the .env file. Localhost will not work in a docker container unless [`host.docker.internal`](https://docs.docker.com/desktop/features/networking/#i-want-to-connect-from-a-container-to-a-service-on-the-host) or a platform-specific variant is provided.
|
||||
```
|
||||
USE_CACHE=true
|
||||
CACHE_TYPE=redis
|
||||
REDIS_CONNECTION_URL=redis://username:password@host:port
|
||||
```
|
||||
|
||||
|
||||
2. Run the container and supply the .env file, `docker run -d -p 5042:80 --env-file .env wp_downloader`
|
||||
Alternatively, if Redis is running on localhost
|
||||
2. Modify your `.env` file, replacing `localhost` with `host.docker.internal`. `redis://localhost:6379` should become `redis://host.docker.internal:6379`. Then, start the container, `docker run -d -p 5042:80 --env-file .env --add-host host.docker.internal:host-gateway wp_downloader`
|
||||
|
||||
---
|
||||
|
||||
My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fast and well-documented package.
|
||||
|
||||
BIN
Binary file not shown.
|
After Width: | Height: | Size: 264 KiB |
@@ -0,0 +1,3 @@
|
||||
USE_CACHE=true
|
||||
CACHE_TYPE=file
|
||||
REDIS_CONNECTION_URL=
|
||||
@@ -0,0 +1 @@
|
||||
3.10
|
||||
@@ -0,0 +1,26 @@
|
||||
[project]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
description = "Wattpad Downloader API"
|
||||
readme = "../../README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"aiohttp>=3.9.1",
|
||||
"rich>=13.9.4",
|
||||
"fastapi>=0.115.5",
|
||||
"ebooklib>=0.18",
|
||||
"python-dotenv>=1.0.1",
|
||||
"pydantic-settings>=2.6.1",
|
||||
"eliot>=1.16.0",
|
||||
"type-extensions>=0.1.2",
|
||||
"backoff>=2.2.1",
|
||||
"aiohttp-client-cache[all]",
|
||||
"bs4>=0.0.2",
|
||||
"uvicorn>=0.32.1",
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = ['E402']
|
||||
|
||||
[tool.uv.sources]
|
||||
aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-cache.git", rev = "keydb-ttl" }
|
||||
+46
-47
@@ -1,62 +1,61 @@
|
||||
aiofiles==23.2.1
|
||||
aiohttp==3.9.1
|
||||
aiohttp-client-cache==0.10.0
|
||||
aioboto3==13.2.0
|
||||
aiobotocore==2.15.2
|
||||
aiofiles==24.1.0
|
||||
aiohappyeyeballs==2.4.4
|
||||
aiohttp==3.11.9
|
||||
aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6
|
||||
aioitertools==0.12.0
|
||||
aiosignal==1.3.1
|
||||
aiosqlite==0.19.0
|
||||
annotated-types==0.6.0
|
||||
anyio==4.2.0
|
||||
asttokens==2.4.1
|
||||
aiosqlite==0.20.0
|
||||
annotated-types==0.7.0
|
||||
anyio==4.6.2.post1
|
||||
async-timeout==4.0.3
|
||||
attrs==23.1.0
|
||||
backoff==2.2.1
|
||||
beautifulsoup4==4.12.3
|
||||
boltons==24.1.0
|
||||
boto3==1.35.36
|
||||
botocore==1.35.36
|
||||
bs4==0.0.2
|
||||
click==8.1.7
|
||||
comm==0.2.0
|
||||
debugpy==1.8.0
|
||||
decorator==5.1.1
|
||||
EbookLib==0.18
|
||||
exceptiongroup==1.2.0
|
||||
executing==2.0.1
|
||||
fastapi==0.108.0
|
||||
dnspython==2.7.0
|
||||
ebooklib==0.18
|
||||
eliot==1.16.0
|
||||
exceptiongroup==1.2.2
|
||||
fastapi==0.115.5
|
||||
frozenlist==1.4.1
|
||||
h11==0.14.0
|
||||
idna==3.6
|
||||
ipykernel==6.28.0
|
||||
ipython==8.19.0
|
||||
itsdangerous==2.1.2
|
||||
jedi==0.19.1
|
||||
jupyter_client==8.6.0
|
||||
jupyter_core==5.5.1
|
||||
lxml==4.9.4
|
||||
itsdangerous==2.2.0
|
||||
jmespath==1.0.1
|
||||
lxml==5.3.0
|
||||
markdown-it-py==3.0.0
|
||||
matplotlib-inline==0.1.6
|
||||
mdurl==0.1.2
|
||||
motor==3.6.0
|
||||
multidict==6.0.4
|
||||
nest-asyncio==1.5.8
|
||||
packaging==23.2
|
||||
parso==0.8.3
|
||||
pexpect==4.9.0
|
||||
platformdirs==4.1.0
|
||||
prompt-toolkit==3.0.43
|
||||
psutil==5.9.7
|
||||
ptyprocess==0.7.0
|
||||
pure-eval==0.2.2
|
||||
pydantic==2.5.3
|
||||
pydantic_core==2.14.6
|
||||
Pygments==2.17.2
|
||||
python-dateutil==2.8.2
|
||||
pyzmq==25.1.2
|
||||
rich==13.7.0
|
||||
orjson==3.10.12
|
||||
propcache==0.2.1
|
||||
pydantic==2.10.2
|
||||
pydantic-core==2.27.1
|
||||
pydantic-settings==2.6.1
|
||||
pygments==2.18.0
|
||||
pymongo==4.9.2
|
||||
pyrsistent==0.20.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
redis==5.2.0
|
||||
rich==13.9.4
|
||||
s3transfer==0.10.4
|
||||
setuptools==75.6.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
soupsieve==2.5
|
||||
stack-data==0.6.3
|
||||
starlette==0.32.0.post1
|
||||
tornado==6.4
|
||||
traitlets==5.14.0
|
||||
typing_extensions==4.9.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.6
|
||||
starlette==0.41.3
|
||||
type-extensions==0.1.2
|
||||
typing-extensions==4.12.2
|
||||
url-normalize==1.4.3
|
||||
uvicorn==0.25.0
|
||||
wcwidth==0.2.12
|
||||
yarl==1.9.4
|
||||
urllib3==2.2.3
|
||||
uvicorn==0.32.1
|
||||
wrapt==1.17.0
|
||||
yarl==1.18.3
|
||||
zope-interface==7.2
|
||||
|
||||
+222
-87
@@ -1,61 +1,104 @@
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from ebooklib import epub
|
||||
import unicodedata
|
||||
from typing import List, Optional, Tuple
|
||||
from typing_extensions import TypedDict
|
||||
import re
|
||||
import unicodedata
|
||||
import logging
|
||||
from os import environ
|
||||
from enum import Enum
|
||||
import backoff
|
||||
from aiohttp import ClientResponseError, ClientSession
|
||||
from aiohttp_client_cache.session import CachedSession
|
||||
from aiohttp_client_cache import FileBackend
|
||||
from eliot import to_file, start_action
|
||||
from eliot.stdlib import EliotHandler
|
||||
from dotenv import load_dotenv
|
||||
from ebooklib import epub
|
||||
from ebooklib.epub import EpubBook
|
||||
from bs4 import BeautifulSoup
|
||||
from pydantic import TypeAdapter, model_validator, field_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
from aiohttp import ClientResponseError
|
||||
from aiohttp_client_cache.session import CachedSession
|
||||
from aiohttp_client_cache import FileBackend, RedisBackend
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
handler = EliotHandler()
|
||||
logging.getLogger("fastapi").setLevel(logging.INFO)
|
||||
logging.getLogger("fastapi").addHandler(handler)
|
||||
|
||||
if environ.get("DEBUG"):
|
||||
to_file(open("eliot.log", "wb"))
|
||||
|
||||
logger = logging.Logger("wpd")
|
||||
logger.addHandler(handler)
|
||||
|
||||
# --- #
|
||||
|
||||
|
||||
class CacheTypes(Enum):
|
||||
file = "file"
|
||||
redis = "redis"
|
||||
|
||||
|
||||
class Config(BaseSettings):
|
||||
USE_CACHE: bool = True
|
||||
CACHE_TYPE: CacheTypes = CacheTypes.file
|
||||
REDIS_CONNECTION_URL: str = ""
|
||||
|
||||
@field_validator("USE_CACHE", mode="before")
|
||||
def validate_use_cache(cls, value):
|
||||
# Return default if value is an empty string
|
||||
if value == "":
|
||||
return True # Default value for USE_CACHE
|
||||
return value
|
||||
|
||||
@field_validator("CACHE_TYPE", mode="before")
|
||||
def validate_cache_type(cls, value):
|
||||
# Thanks https://stackoverflow.com/a/78157474
|
||||
if value == "":
|
||||
return "file"
|
||||
return value
|
||||
|
||||
@model_validator(mode="after")
|
||||
def prevent_mismatched_redis_url(self):
|
||||
match self.CACHE_TYPE:
|
||||
case CacheTypes.file:
|
||||
if self.REDIS_CONNECTION_URL:
|
||||
raise ValueError(
|
||||
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
|
||||
)
|
||||
case CacheTypes.redis:
|
||||
if not self.REDIS_CONNECTION_URL:
|
||||
raise ValueError(
|
||||
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
# --- #
|
||||
|
||||
headers = {
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||
if config.USE_CACHE:
|
||||
match config.CACHE_TYPE:
|
||||
case CacheTypes.file:
|
||||
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||
case CacheTypes.redis:
|
||||
cache = RedisBackend(
|
||||
cache_name="wpd-aiohttp-cache",
|
||||
address=config.REDIS_CONNECTION_URL,
|
||||
expire_after=43200, # 12 hours
|
||||
)
|
||||
else:
|
||||
cache = None
|
||||
|
||||
logger.info(f"Using {cache=}")
|
||||
|
||||
# --- Utilities --- #
|
||||
|
||||
|
||||
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||
|
||||
Args:
|
||||
username (str): Username.
|
||||
password (str): Password.
|
||||
|
||||
Raises:
|
||||
ValueError: Bad status code.
|
||||
ValueError: No cookies returned.
|
||||
|
||||
Returns:
|
||||
dict: Authorization cookies.
|
||||
"""
|
||||
async with ClientSession(headers=headers) as session:
|
||||
async with session.post(
|
||||
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
||||
data={
|
||||
"username": username.lower(),
|
||||
"password": password,
|
||||
}, # the username.lower() is for caching
|
||||
) as response:
|
||||
if response.status != 204:
|
||||
raise ValueError("Not a 204.")
|
||||
|
||||
cookies = {
|
||||
k: v.value
|
||||
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||
}
|
||||
|
||||
if not cookies:
|
||||
raise ValueError("No cookies.")
|
||||
|
||||
return cookies
|
||||
|
||||
|
||||
def slugify(value, allow_unicode=False) -> str:
|
||||
"""
|
||||
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
||||
@@ -79,44 +122,128 @@ def slugify(value, allow_unicode=False) -> str:
|
||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
||||
|
||||
|
||||
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||
|
||||
Args:
|
||||
username (str): Username.
|
||||
password (str): Password.
|
||||
|
||||
Raises:
|
||||
ValueError: Bad status code.
|
||||
ValueError: No cookies returned.
|
||||
|
||||
Returns:
|
||||
dict: Authorization cookies.
|
||||
"""
|
||||
with start_action(action_type="api_fetch_cookies"):
|
||||
async with CachedSession(headers=headers, cache=None) as session:
|
||||
async with session.post(
|
||||
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
||||
data={
|
||||
"username": username.lower(),
|
||||
"password": password,
|
||||
}, # the username.lower() is for caching
|
||||
) as response:
|
||||
if response.status != 204:
|
||||
raise ValueError("Not a 204.")
|
||||
|
||||
cookies = {
|
||||
k: v.value
|
||||
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||
}
|
||||
|
||||
if not cookies:
|
||||
raise ValueError("No cookies.")
|
||||
|
||||
return cookies
|
||||
|
||||
|
||||
# --- Models --- #
|
||||
|
||||
|
||||
class Language(TypedDict):
|
||||
name: str
|
||||
|
||||
|
||||
class User(TypedDict):
|
||||
username: str
|
||||
|
||||
|
||||
class Part(TypedDict):
|
||||
id: int
|
||||
title: str
|
||||
|
||||
|
||||
class Story(TypedDict):
|
||||
id: str
|
||||
title: str
|
||||
createDate: str
|
||||
modifyDate: str
|
||||
language: Language
|
||||
user: User
|
||||
description: str
|
||||
cover: str
|
||||
completed: bool
|
||||
tags: List[str]
|
||||
mature: bool
|
||||
url: str
|
||||
parts: List[Part]
|
||||
isPaywalled: bool
|
||||
|
||||
|
||||
story_ta = TypeAdapter(Story)
|
||||
|
||||
# --- API Calls --- #
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
|
||||
"""Taking a story_id, return its information from the Wattpad API."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
async def fetch_story_from_partId(
|
||||
part_id: int, cookies: Optional[dict] = None
|
||||
) -> Tuple[str, Story]:
|
||||
"""Return a Story ID from a Part ID."""
|
||||
with start_action(action_type="api_fetch_storyFromPartId"):
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None if cookies else cache
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover)"
|
||||
) as response:
|
||||
if not response.ok:
|
||||
if response.status in [404, 400]:
|
||||
return {}
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.json()
|
||||
|
||||
return body
|
||||
return str(body["groupId"]), story_ta.validate_python(body["group"])
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> Story:
|
||||
"""Taking a story_id, return its information from the Wattpad API."""
|
||||
with start_action(action_type="api_fetch_story", story_id=story_id):
|
||||
async with CachedSession(
|
||||
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||
) as session:
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.json()
|
||||
|
||||
return story_ta.validate_python(body)
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
|
||||
"""Return the HTML Content of a Part."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
with start_action(action_type="api_fetch_partContent", part_id=part_id):
|
||||
async with CachedSession(
|
||||
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||
) as session:
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
||||
) as response:
|
||||
if not response.ok:
|
||||
if response.status in [404, 400]:
|
||||
return ""
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.text()
|
||||
@@ -125,17 +252,13 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
||||
"""Fetch image bytes."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async def fetch_cover(url: str) -> bytes:
|
||||
"""Fetch cover image bytes."""
|
||||
with start_action(action_type="api_fetch_cover", url=url):
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None
|
||||
) as session: # Don't cache images.
|
||||
async with session.get(url) as response:
|
||||
if not response.ok:
|
||||
if response.status in [404, 400]:
|
||||
return bytes()
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.read()
|
||||
@@ -146,11 +269,13 @@ async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
||||
# --- EPUB Generation --- #
|
||||
|
||||
|
||||
def set_metadata(book, data):
|
||||
def set_metadata(book: EpubBook, data: Story) -> None:
|
||||
"""Set book metadata."""
|
||||
book.add_author(data["user"]["username"])
|
||||
|
||||
book.add_metadata("DC", "title", data["title"])
|
||||
book.add_metadata("DC", "description", data["description"])
|
||||
book.add_metadata("DC", "created", data["createDate"])
|
||||
book.add_metadata("DC", "date", data["createDate"])
|
||||
book.add_metadata("DC", "modified", data["modifyDate"])
|
||||
book.add_metadata("DC", "language", data["language"]["name"])
|
||||
|
||||
@@ -165,19 +290,26 @@ def set_metadata(book, data):
|
||||
)
|
||||
|
||||
|
||||
async def set_cover(book, data, cookies: Optional[dict] = None):
|
||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
|
||||
async def set_cover(book: EpubBook, data: Story) -> None:
|
||||
"""Set book cover."""
|
||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
|
||||
chapter = epub.EpubHtml(
|
||||
file_name="titlepage.xhtml", # Standard for cover page
|
||||
)
|
||||
chapter.set_content('<img src="cover.jpg">')
|
||||
|
||||
|
||||
async def add_chapters(
|
||||
book, data, download_images: bool = False, cookies: Optional[dict] = None
|
||||
book: EpubBook,
|
||||
data: Story,
|
||||
download_images: bool = False,
|
||||
cookies: Optional[dict] = None,
|
||||
):
|
||||
chapters = []
|
||||
|
||||
for cidx, part in enumerate(data["parts"]):
|
||||
content = await fetch_part_content(part["id"], cookies=cookies)
|
||||
title = part["title"]
|
||||
clean_title = slugify(title)
|
||||
|
||||
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
|
||||
chapter = epub.EpubHtml(
|
||||
@@ -188,14 +320,15 @@ async def add_chapters(
|
||||
|
||||
if download_images:
|
||||
soup = BeautifulSoup(content, "lxml")
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None
|
||||
) as session: # Don't cache images.
|
||||
for idx, image in enumerate(soup.find_all("img")):
|
||||
if not image["src"]:
|
||||
continue
|
||||
# Find all image tags and filter for those with sources
|
||||
|
||||
async with session.get(image["src"]) as response:
|
||||
img = epub.EpubImage(
|
||||
media_type="image/jpeg",
|
||||
@@ -203,8 +336,10 @@ async def add_chapters(
|
||||
file_name=f"static/{cidx}/{idx}.jpeg",
|
||||
)
|
||||
book.add_item(img)
|
||||
# Fetch image and pack
|
||||
|
||||
content = content.replace(
|
||||
str(image), f'<img src="static/{cidx}/{idx}.jpeg"/>'
|
||||
str(image["src"]), f"static/{cidx}/{idx}.jpeg"
|
||||
)
|
||||
|
||||
chapter.set_content(f"<h1>{title}</h1>" + content)
|
||||
@@ -216,7 +351,7 @@ async def add_chapters(
|
||||
for chapter in chapters:
|
||||
book.add_item(chapter)
|
||||
|
||||
book.toc = tuple(chapters)
|
||||
book.toc = chapters
|
||||
|
||||
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
||||
book.add_item(epub.EpubNcx())
|
||||
|
||||
+109
-24
@@ -1,7 +1,16 @@
|
||||
"""WattpadDownloader API Server."""
|
||||
|
||||
from typing import Optional
|
||||
import asyncio
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from io import BytesIO
|
||||
from enum import Enum
|
||||
from eliot import start_action
|
||||
from aiohttp import ClientResponseError
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from ebooklib import epub
|
||||
from create_book import (
|
||||
retrieve_story,
|
||||
@@ -10,28 +19,106 @@ from create_book import (
|
||||
add_chapters,
|
||||
slugify,
|
||||
wp_get_cookies,
|
||||
fetch_story_from_partId,
|
||||
logger,
|
||||
)
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
BUILD_PATH = Path(__file__).parent / "build"
|
||||
|
||||
headers = {
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
|
||||
class RequestCancelledMiddleware:
|
||||
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope, receive, send):
|
||||
if scope["type"] != "http":
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
# Let's make a shared queue for the request messages
|
||||
queue = asyncio.Queue()
|
||||
|
||||
async def message_poller(sentinel, handler_task):
|
||||
nonlocal queue
|
||||
while True:
|
||||
message = await receive()
|
||||
if message["type"] == "http.disconnect":
|
||||
handler_task.cancel()
|
||||
return sentinel # Break the loop
|
||||
|
||||
# Puts the message in the queue
|
||||
await queue.put(message)
|
||||
|
||||
sentinel = object()
|
||||
handler_task = asyncio.create_task(self.app(scope, queue.get, send))
|
||||
asyncio.create_task(message_poller(sentinel, handler_task))
|
||||
|
||||
try:
|
||||
return await handler_task
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Cancelling task as connection closed")
|
||||
|
||||
|
||||
app.add_middleware(RequestCancelledMiddleware)
|
||||
|
||||
|
||||
class DownloadMode(Enum):
|
||||
story = "story"
|
||||
part = "part"
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def home():
|
||||
return FileResponse(BUILD_PATH / "index.html")
|
||||
|
||||
|
||||
@app.get("/download/{story_id}")
|
||||
async def download_book(
|
||||
story_id: int,
|
||||
@app.exception_handler(ClientResponseError)
|
||||
def download_error_handler(request: Request, exception: ClientResponseError):
|
||||
match exception.status:
|
||||
case 400 | 404:
|
||||
return HTMLResponse(
|
||||
status_code=404,
|
||||
content='This story does not exist, or has been deleted. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
case 429:
|
||||
# Rate-limit by Wattpad
|
||||
return HTMLResponse(
|
||||
status_code=429,
|
||||
content='The website is overloaded. Please try again in a few minutes. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
case _:
|
||||
# Unhandled error
|
||||
return HTMLResponse(
|
||||
status_code=500,
|
||||
content='Something went wrong. Yell at me on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
|
||||
|
||||
@app.get("/download/{download_id}")
|
||||
async def handle_download(
|
||||
download_id: int,
|
||||
download_images: bool = False,
|
||||
mode: DownloadMode = DownloadMode.story,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
):
|
||||
with start_action(
|
||||
action_type="download",
|
||||
download_id=download_id,
|
||||
download_images=download_images,
|
||||
mode=mode,
|
||||
):
|
||||
if username and not password or password and not username:
|
||||
logger.error(
|
||||
"Username with no Password or Password with no Username provided."
|
||||
)
|
||||
return HTMLResponse(
|
||||
status_code=422,
|
||||
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
@@ -42,6 +129,7 @@ async def download_book(
|
||||
try:
|
||||
cookies = await wp_get_cookies(username=username, password=password)
|
||||
except ValueError:
|
||||
logger.error("Invalid username or password.")
|
||||
return HTMLResponse(
|
||||
status_code=403,
|
||||
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
@@ -49,25 +137,22 @@ async def download_book(
|
||||
else:
|
||||
cookies = None
|
||||
|
||||
data = await retrieve_story(story_id, cookies=cookies)
|
||||
match mode:
|
||||
case DownloadMode.story:
|
||||
story_id = download_id
|
||||
metadata = await retrieve_story(story_id, cookies)
|
||||
case DownloadMode.part:
|
||||
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
|
||||
|
||||
logger.info(f"Retrieved story id ({story_id=})")
|
||||
|
||||
book = epub.EpubBook()
|
||||
set_metadata(book, metadata)
|
||||
await set_cover(book, metadata)
|
||||
|
||||
try:
|
||||
set_metadata(book, data)
|
||||
except KeyError:
|
||||
return HTMLResponse(
|
||||
status_code=404,
|
||||
content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
|
||||
await set_cover(book, data, cookies=cookies)
|
||||
# print("Metadata Downloaded")
|
||||
|
||||
# Chapters are downloaded
|
||||
async for title in add_chapters(
|
||||
book, data, download_images=download_images, cookies=cookies
|
||||
book, metadata, download_images=download_images, cookies=cookies
|
||||
):
|
||||
# print(f"Part ({title}) downloaded")
|
||||
...
|
||||
|
||||
# Book is compiled
|
||||
@@ -85,7 +170,7 @@ async def download_book(
|
||||
BytesIO(book_data),
|
||||
media_type="application/epub+zip",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{slugify(data["title"])}_{story_id}_{"images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
||||
},
|
||||
)
|
||||
|
||||
@@ -96,4 +181,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=80)
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16)
|
||||
|
||||
Generated
+1294
File diff suppressed because it is too large
Load Diff
@@ -25,8 +25,6 @@
|
||||
|
||||
<!-- Meta Tags Generated with https://metatags.io -->
|
||||
|
||||
<script defer src="https://feedback.fish/ff.js?pid=f8df016d4ffdfb"></script>
|
||||
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
<aside>
|
||||
<div class="grid grid-cols-3 max-w-lg w-full">
|
||||
<a
|
||||
href="https://liberapay.com/TheOnlyWayUp/"
|
||||
href="https://patreon.com/theonlywayup"
|
||||
target="_blank"
|
||||
class="link"
|
||||
data-umami-event="Footer Donate">Donate</a
|
||||
data-umami-event="Footer Donate">Patreon</a
|
||||
>
|
||||
<a
|
||||
href="https://rambhat.la"
|
||||
|
||||
@@ -1,45 +1,79 @@
|
||||
<script>
|
||||
let story_id = "";
|
||||
let download_images = false;
|
||||
let is_paid_story = false;
|
||||
let invalid_url = false;
|
||||
let after_download_page = false;
|
||||
let credentials = {
|
||||
username: "",
|
||||
password: "",
|
||||
};
|
||||
let after_download_page = false;
|
||||
let url = "";
|
||||
|
||||
let raw_story_id = "";
|
||||
let is_part_id = false;
|
||||
let download_id = "";
|
||||
let mode = "";
|
||||
let input_url = "";
|
||||
|
||||
let button_disabled = false;
|
||||
$: button_disabled =
|
||||
!story_id ||
|
||||
!input_url ||
|
||||
(is_paid_story && !(credentials.username && credentials.password));
|
||||
|
||||
$: {
|
||||
is_part_id = false;
|
||||
if (raw_story_id.includes("wattpad.com")) {
|
||||
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
|
||||
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
|
||||
$: url =
|
||||
`/download/` +
|
||||
download_id +
|
||||
`?om=1` +
|
||||
(download_images ? "&download_images=true" : "") +
|
||||
(is_paid_story
|
||||
? `&username=${encodeURIComponent(credentials.username)}&password=${encodeURIComponent(credentials.password)}`
|
||||
: "") +
|
||||
`&mode=${mode}`;
|
||||
|
||||
if (raw_story_id.includes("/story/")) {
|
||||
$: {
|
||||
if (input_url.length) {
|
||||
input_url = input_url.toLowerCase();
|
||||
|
||||
invalid_url = false;
|
||||
|
||||
if (/^\d+$/.test(input_url)) {
|
||||
// All numbers
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else if (input_url.includes("wattpad.com/")) {
|
||||
// Is a string and contains contain wattpad.com/
|
||||
|
||||
if (input_url.includes("/story/")) {
|
||||
// https://wattpad.com/story/237369078-wattpad-books-presents
|
||||
story_id = raw_story_id.split("/story/")[1].split("-")[0];
|
||||
raw_story_id = story_id;
|
||||
} else if (raw_story_id.includes("/stories/")) {
|
||||
input_url = input_url.split("-")[0].split("?")[0].split("/story/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else if (input_url.includes("/stories/")) {
|
||||
// https://www.wattpad.com/api/v3/stories/237369078?fields=...
|
||||
story_id = raw_story_id.split("/stories/")[1].split("?")[0];
|
||||
raw_story_id = story_id;
|
||||
input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else {
|
||||
// https://www.wattpad.com/939051741-wattpad-books-presents-part-name
|
||||
is_part_id = true;
|
||||
raw_story_id = "";
|
||||
story_id = "";
|
||||
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
|
||||
input_url = input_url.split("-")[0].split("?")[0].split("wattpad.com/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
if (/^\d+$/.test(download_id)) {
|
||||
// If "wattpad.com/{download_id}" contains only numbers
|
||||
mode = "part";
|
||||
} else {
|
||||
invalid_url = true;
|
||||
input_url = "";
|
||||
download_id = "";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
story_id = parseInt(raw_story_id) || ""; // parseInt returns NaN for undefined values.
|
||||
raw_story_id = story_id;
|
||||
invalid_url = true;
|
||||
}
|
||||
|
||||
input_url = input_url.match(/\d+/g)?.join("") || "";
|
||||
download_id = input_url;
|
||||
|
||||
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
|
||||
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
|
||||
} else {
|
||||
invalid_url = false;
|
||||
download_id = "";
|
||||
}
|
||||
}
|
||||
</script>
|
||||
@@ -61,6 +95,18 @@
|
||||
</p>
|
||||
<ul class="pt-4 list list-inside text-xl">
|
||||
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
|
||||
<li>12/24 - 📂 Improved Performance</li>
|
||||
<li>11/24 - 🔗 Paste Links!</li>
|
||||
<li>11/24 - 📨 Send to Kindle Support!</li>
|
||||
|
||||
<li>11/24 - ⚒️ Fix Image Downloads</li>
|
||||
<li>
|
||||
10/24 - 👾 Add the <a
|
||||
href="https://discord.com/oauth2/authorize?client_id=1292173380065296395&permissions=274878285888&scope=bot%20applications.commands"
|
||||
target="_blank"
|
||||
class="link underline">Discord Bot</a
|
||||
>!
|
||||
</li>
|
||||
<li>07/24 - 🔡 RTL Language support! (Arabic, etc.)</li>
|
||||
<li>06/24 - 🔑 Authenticated Downloads!</li>
|
||||
<li>06/24 - 🖼️ Image Downloading!</li>
|
||||
@@ -71,29 +117,29 @@
|
||||
<div class="form-control">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Story ID"
|
||||
placeholder="Story URL"
|
||||
class="input input-bordered"
|
||||
class:input-warning={is_part_id}
|
||||
bind:value={raw_story_id}
|
||||
class:input-warning={invalid_url}
|
||||
bind:value={input_url}
|
||||
required
|
||||
name="story_id"
|
||||
name="input_url"
|
||||
/>
|
||||
<label class="label" for="story_id">
|
||||
{#if is_part_id}
|
||||
<label class="label" for="input_url">
|
||||
{#if invalid_url}
|
||||
<p class=" text-red-500">
|
||||
Refer to (<button
|
||||
class="link font-semibold"
|
||||
onclick="StoryIDTutorialModal.showModal()"
|
||||
data-umami-event="Part StoryIDTutorialModal Open"
|
||||
>How to get a Story ID</button
|
||||
onclick="StoryURLTutorialModal.showModal()"
|
||||
data-umami-event="Part StoryURLTutorialModal Open"
|
||||
>How to get a Story URL</button
|
||||
>).
|
||||
</p>
|
||||
{:else}
|
||||
<button
|
||||
class="label-text link font-semibold"
|
||||
onclick="StoryIDTutorialModal.showModal()"
|
||||
data-umami-event="StoryIDTutorialModal Open"
|
||||
>How to get a Story ID</button
|
||||
onclick="StoryURLTutorialModal.showModal()"
|
||||
data-umami-event="StoryURLTutorialModal Open"
|
||||
>How to get a Story URL</button
|
||||
>
|
||||
{/if}
|
||||
</label>
|
||||
@@ -187,7 +233,13 @@
|
||||
>, where we release features early and discuss updates.
|
||||
</p>
|
||||
</div>
|
||||
<a href="/" class="btn btn-outline btn-lg mt-10">Download More</a>
|
||||
<button
|
||||
on:click={() => {
|
||||
after_download_page = false;
|
||||
input_url = "";
|
||||
}}
|
||||
class="btn btn-outline btn-lg mt-10">Download More</button
|
||||
>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -196,32 +248,31 @@
|
||||
|
||||
<!-- Open the modal using ID.showModal() method -->
|
||||
|
||||
<dialog id="StoryIDTutorialModal" class="modal">
|
||||
<dialog id="StoryURLTutorialModal" class="modal">
|
||||
<div class="modal-box">
|
||||
<form method="dialog">
|
||||
<button class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2"
|
||||
>✕</button
|
||||
>
|
||||
</form>
|
||||
<h3 class="font-bold text-lg">Retrieving a Story ID</h3>
|
||||
<h3 class="font-bold text-lg">Finding the Story URL</h3>
|
||||
<ol class="list list-disc list-inside py-4 space-y-4">
|
||||
<li>
|
||||
Open the Story URL, this page includes the story description and tags.
|
||||
(For example, <span class="font-mono bg-slate-100 p-1"
|
||||
>wattpad.com/story/237369078-wattpad-books-presents</span
|
||||
>).
|
||||
Copy the URL from the Website, or hit share and copy the URL on the App.
|
||||
</li>
|
||||
<li>
|
||||
Copy the numbers after the <span class="font-mono bg-slate-100 p-1"
|
||||
>/</span
|
||||
>
|
||||
(In the example, that'd be,
|
||||
For example,
|
||||
<span class="font-mono bg-slate-100 p-1"
|
||||
>wattpad.com/story/<span class="bg-amber-200 p-1">237369078</span
|
||||
>-wattpad-books-presents</span
|
||||
>)
|
||||
>wattpad.com/<span class="bg-amber-200 rounded-sm">story</span
|
||||
>/237369078-wattpad-books-presents</span
|
||||
>.
|
||||
</li>
|
||||
<li>Paste the Story ID and hit Download!</li>
|
||||
<li>
|
||||
<span class="font-mono bg-slate-100 p-1"
|
||||
>https://www.wattpad.com/939103774-given</span
|
||||
> is okay too.
|
||||
</li>
|
||||
<li>Paste the URL and hit Download!</li>
|
||||
</ol>
|
||||
</div>
|
||||
<form method="dialog" class="modal-backdrop">
|
||||
|
||||
Reference in New Issue
Block a user