Merge pull request #23 from TheOnlyWayUp/fix/#22-redis-cache
Concurrent requests fail Co-authored-by: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com>
This commit is contained in:
@@ -5,3 +5,6 @@ data
|
|||||||
*ipynb
|
*ipynb
|
||||||
build
|
build
|
||||||
.vscode
|
.vscode
|
||||||
|
.venv
|
||||||
|
.env
|
||||||
|
*log
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
USE_CACHE=true
|
||||||
|
CACHE_TYPE=file
|
||||||
|
REDIS_CONNECTION_URL=
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3.10
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
[project]
|
||||||
|
name = "api"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Wattpad Downloader API"
|
||||||
|
readme = "../../README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"aiohttp>=3.9.1",
|
||||||
|
"aiohttp-client-cache[all]>=0.10.0",
|
||||||
|
"rich>=13.9.4",
|
||||||
|
"fastapi>=0.115.5",
|
||||||
|
"ebooklib>=0.18",
|
||||||
|
"python-dotenv>=1.0.1",
|
||||||
|
"pydantic-settings>=2.6.1",
|
||||||
|
"eliot>=1.16.0",
|
||||||
|
"type-extensions>=0.1.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
ignore = ['E402']
|
||||||
+37
-15
@@ -1,24 +1,32 @@
|
|||||||
|
aioboto3==12.4.0
|
||||||
|
aiobotocore==2.12.3
|
||||||
aiofiles==23.2.1
|
aiofiles==23.2.1
|
||||||
aiohttp==3.9.1
|
aiohttp==3.9.1
|
||||||
aiohttp-client-cache==0.10.0
|
aiohttp-client-cache==0.10.0
|
||||||
|
aioitertools==0.12.0
|
||||||
aiosignal==1.3.1
|
aiosignal==1.3.1
|
||||||
aiosqlite==0.19.0
|
aiosqlite==0.19.0
|
||||||
annotated-types==0.6.0
|
annotated-types==0.7.0
|
||||||
anyio==4.2.0
|
anyio==4.6.2.post1
|
||||||
asttokens==2.4.1
|
asttokens==2.4.1
|
||||||
async-timeout==4.0.3
|
async-timeout==4.0.3
|
||||||
attrs==23.1.0
|
attrs==23.1.0
|
||||||
backoff==2.2.1
|
backoff==2.2.1
|
||||||
beautifulsoup4==4.12.3
|
beautifulsoup4==4.12.3
|
||||||
|
boltons==24.1.0
|
||||||
|
boto3==1.34.69
|
||||||
|
botocore==1.34.69
|
||||||
bs4==0.0.2
|
bs4==0.0.2
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
comm==0.2.0
|
comm==0.2.0
|
||||||
debugpy==1.8.0
|
debugpy==1.8.0
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
EbookLib==0.18
|
dnspython==2.7.0
|
||||||
exceptiongroup==1.2.0
|
ebooklib==0.18
|
||||||
|
eliot==1.16.0
|
||||||
|
exceptiongroup==1.2.2
|
||||||
executing==2.0.1
|
executing==2.0.1
|
||||||
fastapi==0.108.0
|
fastapi==0.115.5
|
||||||
frozenlist==1.4.1
|
frozenlist==1.4.1
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
idna==3.6
|
idna==3.6
|
||||||
@@ -26,14 +34,17 @@ ipykernel==6.28.0
|
|||||||
ipython==8.19.0
|
ipython==8.19.0
|
||||||
itsdangerous==2.1.2
|
itsdangerous==2.1.2
|
||||||
jedi==0.19.1
|
jedi==0.19.1
|
||||||
jupyter_client==8.6.0
|
jmespath==1.0.1
|
||||||
jupyter_core==5.5.1
|
jupyter-client==8.6.0
|
||||||
lxml==4.9.4
|
jupyter-core==5.5.1
|
||||||
|
lxml==5.3.0
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
matplotlib-inline==0.1.6
|
matplotlib-inline==0.1.6
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
|
motor==3.6.0
|
||||||
multidict==6.0.4
|
multidict==6.0.4
|
||||||
nest-asyncio==1.5.8
|
nest-asyncio==1.5.8
|
||||||
|
orjson==3.10.12
|
||||||
packaging==23.2
|
packaging==23.2
|
||||||
parso==0.8.3
|
parso==0.8.3
|
||||||
pexpect==4.9.0
|
pexpect==4.9.0
|
||||||
@@ -42,21 +53,32 @@ prompt-toolkit==3.0.43
|
|||||||
psutil==5.9.7
|
psutil==5.9.7
|
||||||
ptyprocess==0.7.0
|
ptyprocess==0.7.0
|
||||||
pure-eval==0.2.2
|
pure-eval==0.2.2
|
||||||
pydantic==2.5.3
|
pydantic==2.10.2
|
||||||
pydantic_core==2.14.6
|
pydantic-core==2.27.1
|
||||||
Pygments==2.17.2
|
pydantic-settings==2.6.1
|
||||||
|
pygments==2.18.0
|
||||||
|
pymongo==4.9.2
|
||||||
|
pyrsistent==0.20.0
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
|
python-dotenv==1.0.1
|
||||||
pyzmq==25.1.2
|
pyzmq==25.1.2
|
||||||
rich==13.7.0
|
redis==5.2.0
|
||||||
|
rich==13.9.4
|
||||||
|
s3transfer==0.10.4
|
||||||
|
setuptools==75.6.0
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
sniffio==1.3.0
|
sniffio==1.3.1
|
||||||
soupsieve==2.5
|
soupsieve==2.5
|
||||||
stack-data==0.6.3
|
stack-data==0.6.3
|
||||||
starlette==0.32.0.post1
|
starlette==0.41.3
|
||||||
tornado==6.4
|
tornado==6.4
|
||||||
traitlets==5.14.0
|
traitlets==5.14.0
|
||||||
typing_extensions==4.9.0
|
type-extensions==0.1.2
|
||||||
|
typing-extensions==4.12.2
|
||||||
url-normalize==1.4.3
|
url-normalize==1.4.3
|
||||||
|
urllib3==2.2.3
|
||||||
uvicorn==0.25.0
|
uvicorn==0.25.0
|
||||||
wcwidth==0.2.12
|
wcwidth==0.2.12
|
||||||
|
wrapt==1.17.0
|
||||||
yarl==1.9.4
|
yarl==1.9.4
|
||||||
|
zope-interface==7.2
|
||||||
|
|||||||
+196
-81
@@ -1,61 +1,102 @@
|
|||||||
import asyncio
|
from typing import List, Optional, Tuple
|
||||||
from typing import Optional
|
from typing_extensions import TypedDict
|
||||||
from ebooklib import epub
|
|
||||||
import unicodedata
|
|
||||||
import re
|
import re
|
||||||
|
import unicodedata
|
||||||
|
import logging
|
||||||
|
from os import environ
|
||||||
|
from enum import Enum
|
||||||
import backoff
|
import backoff
|
||||||
from aiohttp import ClientResponseError, ClientSession
|
from eliot import to_file, start_action
|
||||||
from aiohttp_client_cache.session import CachedSession
|
from eliot.stdlib import EliotHandler
|
||||||
from aiohttp_client_cache import FileBackend
|
from dotenv import load_dotenv
|
||||||
|
from ebooklib import epub
|
||||||
|
from ebooklib.epub import EpubBook
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from pydantic import TypeAdapter, model_validator, field_validator
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
from aiohttp import ClientResponseError
|
||||||
|
from aiohttp_client_cache.session import CachedSession
|
||||||
|
from aiohttp_client_cache import FileBackend, RedisBackend
|
||||||
|
|
||||||
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
handler = EliotHandler()
|
||||||
|
logging.getLogger("fastapi").setLevel(logging.INFO)
|
||||||
|
logging.getLogger("fastapi").addHandler(handler)
|
||||||
|
|
||||||
|
if environ.get("DEBUG"):
|
||||||
|
to_file(open("eliot.log", "wb"))
|
||||||
|
|
||||||
|
logger = logging.Logger("wpd")
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
# --- #
|
||||||
|
|
||||||
|
|
||||||
|
class CacheTypes(Enum):
|
||||||
|
file = "file"
|
||||||
|
redis = "redis"
|
||||||
|
|
||||||
|
|
||||||
|
class Config(BaseSettings):
|
||||||
|
USE_CACHE: bool = True
|
||||||
|
CACHE_TYPE: CacheTypes = CacheTypes.file
|
||||||
|
REDIS_CONNECTION_URL: str = ""
|
||||||
|
|
||||||
|
@field_validator("USE_CACHE", mode="before")
|
||||||
|
def validate_use_cache(cls, value):
|
||||||
|
# Return default if value is an empty string
|
||||||
|
if value == "":
|
||||||
|
return True # Default value for USE_CACHE
|
||||||
|
return value
|
||||||
|
|
||||||
|
@field_validator("CACHE_TYPE", mode="before")
|
||||||
|
def validate_cache_type(cls, value):
|
||||||
|
# Thanks https://stackoverflow.com/a/78157474
|
||||||
|
if value == "":
|
||||||
|
return "file"
|
||||||
|
return value
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def prevent_mismatched_redis_url(self):
|
||||||
|
match self.CACHE_TYPE:
|
||||||
|
case CacheTypes.file:
|
||||||
|
if self.REDIS_CONNECTION_URL:
|
||||||
|
raise ValueError(
|
||||||
|
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
|
||||||
|
)
|
||||||
|
case CacheTypes.redis:
|
||||||
|
if not self.REDIS_CONNECTION_URL:
|
||||||
|
raise ValueError(
|
||||||
|
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
config = Config()
|
||||||
|
|
||||||
|
# --- #
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.USE_CACHE:
|
||||||
|
match config.CACHE_TYPE:
|
||||||
|
case CacheTypes.file:
|
||||||
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||||
|
case CacheTypes.redis:
|
||||||
|
cache = RedisBackend(
|
||||||
|
cache_name="wpd-aiohttp-cache", address=config.REDIS_CONNECTION_URL
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cache = None
|
||||||
|
|
||||||
|
logger.info(f"Using {cache=}")
|
||||||
|
|
||||||
# --- Utilities --- #
|
# --- Utilities --- #
|
||||||
|
|
||||||
|
|
||||||
async def wp_get_cookies(username: str, password: str) -> dict:
|
|
||||||
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
|
||||||
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
username (str): Username.
|
|
||||||
password (str): Password.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: Bad status code.
|
|
||||||
ValueError: No cookies returned.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Authorization cookies.
|
|
||||||
"""
|
|
||||||
async with ClientSession(headers=headers) as session:
|
|
||||||
async with session.post(
|
|
||||||
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
|
||||||
data={
|
|
||||||
"username": username.lower(),
|
|
||||||
"password": password,
|
|
||||||
}, # the username.lower() is for caching
|
|
||||||
) as response:
|
|
||||||
if response.status != 204:
|
|
||||||
raise ValueError("Not a 204.")
|
|
||||||
|
|
||||||
cookies = {
|
|
||||||
k: v.value
|
|
||||||
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
|
||||||
}
|
|
||||||
|
|
||||||
if not cookies:
|
|
||||||
raise ValueError("No cookies.")
|
|
||||||
|
|
||||||
return cookies
|
|
||||||
|
|
||||||
|
|
||||||
def slugify(value, allow_unicode=False) -> str:
|
def slugify(value, allow_unicode=False) -> str:
|
||||||
"""
|
"""
|
||||||
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
||||||
@@ -79,35 +120,108 @@ def slugify(value, allow_unicode=False) -> str:
|
|||||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
||||||
|
|
||||||
|
|
||||||
|
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||||
|
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||||
|
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
username (str): Username.
|
||||||
|
password (str): Password.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: Bad status code.
|
||||||
|
ValueError: No cookies returned.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Authorization cookies.
|
||||||
|
"""
|
||||||
|
with start_action(action_type="api_fetch_cookies"):
|
||||||
|
async with CachedSession(headers=headers, cache=None) as session:
|
||||||
|
async with session.post(
|
||||||
|
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
||||||
|
data={
|
||||||
|
"username": username.lower(),
|
||||||
|
"password": password,
|
||||||
|
}, # the username.lower() is for caching
|
||||||
|
) as response:
|
||||||
|
if response.status != 204:
|
||||||
|
raise ValueError("Not a 204.")
|
||||||
|
|
||||||
|
cookies = {
|
||||||
|
k: v.value
|
||||||
|
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||||
|
}
|
||||||
|
|
||||||
|
if not cookies:
|
||||||
|
raise ValueError("No cookies.")
|
||||||
|
|
||||||
|
return cookies
|
||||||
|
|
||||||
|
|
||||||
|
# --- Models --- #
|
||||||
|
|
||||||
|
|
||||||
|
class Language(TypedDict):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
class User(TypedDict):
|
||||||
|
username: str
|
||||||
|
|
||||||
|
|
||||||
|
class Part(TypedDict):
|
||||||
|
id: int
|
||||||
|
title: str
|
||||||
|
|
||||||
|
|
||||||
|
class Story(TypedDict):
|
||||||
|
id: str
|
||||||
|
title: str
|
||||||
|
createDate: str
|
||||||
|
modifyDate: str
|
||||||
|
language: Language
|
||||||
|
user: User
|
||||||
|
description: str
|
||||||
|
cover: str
|
||||||
|
completed: bool
|
||||||
|
tags: List[str]
|
||||||
|
mature: bool
|
||||||
|
url: str
|
||||||
|
parts: List[Part]
|
||||||
|
isPaywalled: bool
|
||||||
|
|
||||||
|
|
||||||
|
story_ta = TypeAdapter(Story)
|
||||||
|
|
||||||
# --- API Calls --- #
|
# --- API Calls --- #
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
async def fetch_story_id(part_id: int, cookies: Optional[dict] = None) -> int:
|
async def fetch_story_from_partId(
|
||||||
|
part_id: int, cookies: Optional[dict] = None
|
||||||
|
) -> Tuple[str, Story]:
|
||||||
"""Return a Story ID from a Part ID."""
|
"""Return a Story ID from a Part ID."""
|
||||||
async with (
|
with start_action(action_type="api_fetch_storyFromPartId"):
|
||||||
CachedSession(headers=headers, cache=cache)
|
async with CachedSession(
|
||||||
if not cookies
|
headers=headers, cache=None if cookies else cache
|
||||||
else ClientSession(headers=headers, cookies=cookies)
|
|
||||||
) as session: # Don't cache requests with Cookies.
|
) as session: # Don't cache requests with Cookies.
|
||||||
async with session.get(
|
async with session.get(
|
||||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId"
|
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover)"
|
||||||
) as response:
|
) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
body = await response.json()
|
body = await response.json()
|
||||||
|
|
||||||
return body["groupId"]
|
return str(body["groupId"]), story_ta.validate_python(body["group"])
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
|
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> Story:
|
||||||
"""Taking a story_id, return its information from the Wattpad API."""
|
"""Taking a story_id, return its information from the Wattpad API."""
|
||||||
async with (
|
with start_action(action_type="api_fetch_story", story_id=story_id):
|
||||||
CachedSession(headers=headers, cache=cache)
|
async with CachedSession(
|
||||||
if not cookies
|
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||||
else ClientSession(headers=headers, cookies=cookies)
|
) as session:
|
||||||
) as session: # Don't cache requests with Cookies.
|
|
||||||
async with session.get(
|
async with session.get(
|
||||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||||
) as response:
|
) as response:
|
||||||
@@ -115,17 +229,16 @@ async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
|
|||||||
|
|
||||||
body = await response.json()
|
body = await response.json()
|
||||||
|
|
||||||
return body
|
return story_ta.validate_python(body)
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
|
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
|
||||||
"""Return the HTML Content of a Part."""
|
"""Return the HTML Content of a Part."""
|
||||||
async with (
|
with start_action(action_type="api_fetch_partContent", part_id=part_id):
|
||||||
CachedSession(headers=headers, cache=cache)
|
async with CachedSession(
|
||||||
if not cookies
|
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||||
else ClientSession(headers=headers, cookies=cookies)
|
) as session:
|
||||||
) as session: # Don't cache requests with Cookies.
|
|
||||||
async with session.get(
|
async with session.get(
|
||||||
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
||||||
) as response:
|
) as response:
|
||||||
@@ -137,13 +250,12 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st
|
|||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
async def fetch_cover(url: str) -> bytes:
|
||||||
"""Fetch image bytes."""
|
"""Fetch cover image bytes."""
|
||||||
async with (
|
with start_action(action_type="api_fetch_cover", url=url):
|
||||||
CachedSession(headers=headers, cache=cache)
|
async with CachedSession(
|
||||||
if not cookies
|
headers=headers, cache=None
|
||||||
else ClientSession(headers=headers, cookies=cookies)
|
) as session: # Don't cache images.
|
||||||
) as session: # Don't cache requests with Cookies.
|
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
@@ -155,7 +267,8 @@ async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
|||||||
# --- EPUB Generation --- #
|
# --- EPUB Generation --- #
|
||||||
|
|
||||||
|
|
||||||
def set_metadata(book, data):
|
def set_metadata(book: EpubBook, data: Story) -> None:
|
||||||
|
"""Set book metadata."""
|
||||||
book.add_author(data["user"]["username"])
|
book.add_author(data["user"]["username"])
|
||||||
|
|
||||||
book.add_metadata("DC", "title", data["title"])
|
book.add_metadata("DC", "title", data["title"])
|
||||||
@@ -175,16 +288,20 @@ def set_metadata(book, data):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def set_cover(book, data, cookies: Optional[dict] = None):
|
async def set_cover(book: EpubBook, data: Story) -> None:
|
||||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
|
"""Set book cover."""
|
||||||
|
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
|
||||||
chapter = epub.EpubHtml(
|
chapter = epub.EpubHtml(
|
||||||
file_name=f"titlepage.xhtml", # Standard for cover page
|
file_name="titlepage.xhtml", # Standard for cover page
|
||||||
)
|
)
|
||||||
chapter.set_content('<img src="cover.jpg">')
|
chapter.set_content('<img src="cover.jpg">')
|
||||||
|
|
||||||
|
|
||||||
async def add_chapters(
|
async def add_chapters(
|
||||||
book, data, download_images: bool = False, cookies: Optional[dict] = None
|
book: EpubBook,
|
||||||
|
data: Story,
|
||||||
|
download_images: bool = False,
|
||||||
|
cookies: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
chapters = []
|
chapters = []
|
||||||
|
|
||||||
@@ -202,11 +319,9 @@ async def add_chapters(
|
|||||||
if download_images:
|
if download_images:
|
||||||
soup = BeautifulSoup(content, "lxml")
|
soup = BeautifulSoup(content, "lxml")
|
||||||
|
|
||||||
async with (
|
async with CachedSession(
|
||||||
CachedSession(headers=headers, cache=cache)
|
headers=headers, cache=None
|
||||||
if not cookies
|
) as session: # Don't cache images.
|
||||||
else ClientSession(headers=headers, cookies=cookies)
|
|
||||||
) as session: # Don't cache requests with Cookies.
|
|
||||||
for idx, image in enumerate(soup.find_all("img")):
|
for idx, image in enumerate(soup.find_all("img")):
|
||||||
if not image["src"]:
|
if not image["src"]:
|
||||||
continue
|
continue
|
||||||
@@ -234,7 +349,7 @@ async def add_chapters(
|
|||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
book.add_item(chapter)
|
book.add_item(chapter)
|
||||||
|
|
||||||
book.toc = tuple(chapters)
|
book.toc = chapters
|
||||||
|
|
||||||
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
||||||
book.add_item(epub.EpubNcx())
|
book.add_item(epub.EpubNcx())
|
||||||
|
|||||||
+59
-9
@@ -1,10 +1,12 @@
|
|||||||
"""WattpadDownloader API Server."""
|
"""WattpadDownloader API Server."""
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
import asyncio
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from eliot import start_action
|
||||||
from aiohttp import ClientResponseError
|
from aiohttp import ClientResponseError
|
||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
||||||
@@ -17,9 +19,11 @@ from create_book import (
|
|||||||
add_chapters,
|
add_chapters,
|
||||||
slugify,
|
slugify,
|
||||||
wp_get_cookies,
|
wp_get_cookies,
|
||||||
fetch_story_id,
|
fetch_story_from_partId,
|
||||||
|
logger,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
BUILD_PATH = Path(__file__).parent / "build"
|
BUILD_PATH = Path(__file__).parent / "build"
|
||||||
|
|
||||||
@@ -28,10 +32,46 @@ headers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RequestCancelledMiddleware:
|
||||||
|
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
|
||||||
|
def __init__(self, app):
|
||||||
|
self.app = app
|
||||||
|
|
||||||
|
async def __call__(self, scope, receive, send):
|
||||||
|
if scope["type"] != "http":
|
||||||
|
await self.app(scope, receive, send)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Let's make a shared queue for the request messages
|
||||||
|
queue = asyncio.Queue()
|
||||||
|
|
||||||
|
async def message_poller(sentinel, handler_task):
|
||||||
|
nonlocal queue
|
||||||
|
while True:
|
||||||
|
message = await receive()
|
||||||
|
if message["type"] == "http.disconnect":
|
||||||
|
handler_task.cancel()
|
||||||
|
return sentinel # Break the loop
|
||||||
|
|
||||||
|
# Puts the message in the queue
|
||||||
|
await queue.put(message)
|
||||||
|
|
||||||
|
sentinel = object()
|
||||||
|
handler_task = asyncio.create_task(self.app(scope, queue.get, send))
|
||||||
|
asyncio.create_task(message_poller(sentinel, handler_task))
|
||||||
|
|
||||||
|
try:
|
||||||
|
return await handler_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
logger.info("Cancelling task as connection closed")
|
||||||
|
|
||||||
|
|
||||||
|
app.add_middleware(RequestCancelledMiddleware)
|
||||||
|
|
||||||
|
|
||||||
class DownloadMode(Enum):
|
class DownloadMode(Enum):
|
||||||
story = "story"
|
story = "story"
|
||||||
part = "part"
|
part = "part"
|
||||||
collection = "collection"
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
@@ -68,8 +108,17 @@ async def handle_download(
|
|||||||
mode: DownloadMode = DownloadMode.story,
|
mode: DownloadMode = DownloadMode.story,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
|
):
|
||||||
|
with start_action(
|
||||||
|
action_type="download",
|
||||||
|
download_id=download_id,
|
||||||
|
download_images=download_images,
|
||||||
|
mode=mode,
|
||||||
):
|
):
|
||||||
if username and not password or password and not username:
|
if username and not password or password and not username:
|
||||||
|
logger.error(
|
||||||
|
"Username with no Password or Password with no Username provided."
|
||||||
|
)
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
status_code=422,
|
status_code=422,
|
||||||
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||||
@@ -80,6 +129,7 @@ async def handle_download(
|
|||||||
try:
|
try:
|
||||||
cookies = await wp_get_cookies(username=username, password=password)
|
cookies = await wp_get_cookies(username=username, password=password)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
logger.error("Invalid username or password.")
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
status_code=403,
|
status_code=403,
|
||||||
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||||
@@ -90,15 +140,15 @@ async def handle_download(
|
|||||||
match mode:
|
match mode:
|
||||||
case DownloadMode.story:
|
case DownloadMode.story:
|
||||||
story_id = download_id
|
story_id = download_id
|
||||||
|
metadata = await retrieve_story(story_id, cookies)
|
||||||
case DownloadMode.part:
|
case DownloadMode.part:
|
||||||
story_id = await fetch_story_id(download_id, cookies)
|
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
|
||||||
|
|
||||||
|
logger.info(f"Retrieved story id ({story_id=})")
|
||||||
|
|
||||||
book = epub.EpubBook()
|
book = epub.EpubBook()
|
||||||
|
|
||||||
metadata = await retrieve_story(story_id, cookies)
|
|
||||||
set_metadata(book, metadata)
|
set_metadata(book, metadata)
|
||||||
|
await set_cover(book, metadata)
|
||||||
await set_cover(book, metadata, cookies=cookies)
|
|
||||||
|
|
||||||
async for title in add_chapters(
|
async for title in add_chapters(
|
||||||
book, metadata, download_images=download_images, cookies=cookies
|
book, metadata, download_images=download_images, cookies=cookies
|
||||||
@@ -120,7 +170,7 @@ async def handle_download(
|
|||||||
BytesIO(book_data),
|
BytesIO(book_data),
|
||||||
media_type="application/epub+zip",
|
media_type="application/epub+zip",
|
||||||
headers={
|
headers={
|
||||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}_{"images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -131,4 +181,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=80)
|
uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16)
|
||||||
|
|||||||
Generated
+1083
File diff suppressed because it is too large
Load Diff
@@ -31,12 +31,13 @@
|
|||||||
input_url = input_url.toLowerCase();
|
input_url = input_url.toLowerCase();
|
||||||
|
|
||||||
invalid_url = false;
|
invalid_url = false;
|
||||||
if (!input_url.includes("wattpad.com/")) {
|
|
||||||
invalid_url = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
|
if (/^\d+$/.test(input_url)) {
|
||||||
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
|
// All numbers
|
||||||
|
download_id = input_url;
|
||||||
|
mode = "story";
|
||||||
|
} else if (input_url.includes("wattpad.com/")) {
|
||||||
|
// Is a string and contains contain wattpad.com/
|
||||||
|
|
||||||
if (input_url.includes("/story/")) {
|
if (input_url.includes("/story/")) {
|
||||||
// https://wattpad.com/story/237369078-wattpad-books-presents
|
// https://wattpad.com/story/237369078-wattpad-books-presents
|
||||||
@@ -61,6 +62,18 @@
|
|||||||
download_id = "";
|
download_id = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
invalid_url = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
input_url = input_url.match(/\d+/g)?.join("") || "";
|
||||||
|
download_id = input_url;
|
||||||
|
|
||||||
|
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
|
||||||
|
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
|
||||||
|
} else {
|
||||||
|
invalid_url = false;
|
||||||
|
download_id = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
Reference in New Issue
Block a user