Merge pull request #23 from TheOnlyWayUp/fix/#22-redis-cache
Concurrent requests fail Co-authored-by: AaronBenDaniel <144371000+AaronBenDaniel@users.noreply.github.com>
This commit is contained in:
@@ -5,3 +5,6 @@ data
|
||||
*ipynb
|
||||
build
|
||||
.vscode
|
||||
.venv
|
||||
.env
|
||||
*log
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
USE_CACHE=true
|
||||
CACHE_TYPE=file
|
||||
REDIS_CONNECTION_URL=
|
||||
@@ -0,0 +1 @@
|
||||
3.10
|
||||
@@ -0,0 +1,20 @@
|
||||
[project]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
description = "Wattpad Downloader API"
|
||||
readme = "../../README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"aiohttp>=3.9.1",
|
||||
"aiohttp-client-cache[all]>=0.10.0",
|
||||
"rich>=13.9.4",
|
||||
"fastapi>=0.115.5",
|
||||
"ebooklib>=0.18",
|
||||
"python-dotenv>=1.0.1",
|
||||
"pydantic-settings>=2.6.1",
|
||||
"eliot>=1.16.0",
|
||||
"type-extensions>=0.1.2",
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = ['E402']
|
||||
+37
-15
@@ -1,24 +1,32 @@
|
||||
aioboto3==12.4.0
|
||||
aiobotocore==2.12.3
|
||||
aiofiles==23.2.1
|
||||
aiohttp==3.9.1
|
||||
aiohttp-client-cache==0.10.0
|
||||
aioitertools==0.12.0
|
||||
aiosignal==1.3.1
|
||||
aiosqlite==0.19.0
|
||||
annotated-types==0.6.0
|
||||
anyio==4.2.0
|
||||
annotated-types==0.7.0
|
||||
anyio==4.6.2.post1
|
||||
asttokens==2.4.1
|
||||
async-timeout==4.0.3
|
||||
attrs==23.1.0
|
||||
backoff==2.2.1
|
||||
beautifulsoup4==4.12.3
|
||||
boltons==24.1.0
|
||||
boto3==1.34.69
|
||||
botocore==1.34.69
|
||||
bs4==0.0.2
|
||||
click==8.1.7
|
||||
comm==0.2.0
|
||||
debugpy==1.8.0
|
||||
decorator==5.1.1
|
||||
EbookLib==0.18
|
||||
exceptiongroup==1.2.0
|
||||
dnspython==2.7.0
|
||||
ebooklib==0.18
|
||||
eliot==1.16.0
|
||||
exceptiongroup==1.2.2
|
||||
executing==2.0.1
|
||||
fastapi==0.108.0
|
||||
fastapi==0.115.5
|
||||
frozenlist==1.4.1
|
||||
h11==0.14.0
|
||||
idna==3.6
|
||||
@@ -26,14 +34,17 @@ ipykernel==6.28.0
|
||||
ipython==8.19.0
|
||||
itsdangerous==2.1.2
|
||||
jedi==0.19.1
|
||||
jupyter_client==8.6.0
|
||||
jupyter_core==5.5.1
|
||||
lxml==4.9.4
|
||||
jmespath==1.0.1
|
||||
jupyter-client==8.6.0
|
||||
jupyter-core==5.5.1
|
||||
lxml==5.3.0
|
||||
markdown-it-py==3.0.0
|
||||
matplotlib-inline==0.1.6
|
||||
mdurl==0.1.2
|
||||
motor==3.6.0
|
||||
multidict==6.0.4
|
||||
nest-asyncio==1.5.8
|
||||
orjson==3.10.12
|
||||
packaging==23.2
|
||||
parso==0.8.3
|
||||
pexpect==4.9.0
|
||||
@@ -42,21 +53,32 @@ prompt-toolkit==3.0.43
|
||||
psutil==5.9.7
|
||||
ptyprocess==0.7.0
|
||||
pure-eval==0.2.2
|
||||
pydantic==2.5.3
|
||||
pydantic_core==2.14.6
|
||||
Pygments==2.17.2
|
||||
pydantic==2.10.2
|
||||
pydantic-core==2.27.1
|
||||
pydantic-settings==2.6.1
|
||||
pygments==2.18.0
|
||||
pymongo==4.9.2
|
||||
pyrsistent==0.20.0
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.1
|
||||
pyzmq==25.1.2
|
||||
rich==13.7.0
|
||||
redis==5.2.0
|
||||
rich==13.9.4
|
||||
s3transfer==0.10.4
|
||||
setuptools==75.6.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.5
|
||||
stack-data==0.6.3
|
||||
starlette==0.32.0.post1
|
||||
starlette==0.41.3
|
||||
tornado==6.4
|
||||
traitlets==5.14.0
|
||||
typing_extensions==4.9.0
|
||||
type-extensions==0.1.2
|
||||
typing-extensions==4.12.2
|
||||
url-normalize==1.4.3
|
||||
urllib3==2.2.3
|
||||
uvicorn==0.25.0
|
||||
wcwidth==0.2.12
|
||||
wrapt==1.17.0
|
||||
yarl==1.9.4
|
||||
zope-interface==7.2
|
||||
|
||||
+217
-102
@@ -1,61 +1,102 @@
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from ebooklib import epub
|
||||
import unicodedata
|
||||
from typing import List, Optional, Tuple
|
||||
from typing_extensions import TypedDict
|
||||
import re
|
||||
import unicodedata
|
||||
import logging
|
||||
from os import environ
|
||||
from enum import Enum
|
||||
import backoff
|
||||
from aiohttp import ClientResponseError, ClientSession
|
||||
from aiohttp_client_cache.session import CachedSession
|
||||
from aiohttp_client_cache import FileBackend
|
||||
from eliot import to_file, start_action
|
||||
from eliot.stdlib import EliotHandler
|
||||
from dotenv import load_dotenv
|
||||
from ebooklib import epub
|
||||
from ebooklib.epub import EpubBook
|
||||
from bs4 import BeautifulSoup
|
||||
from pydantic import TypeAdapter, model_validator, field_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
from aiohttp import ClientResponseError
|
||||
from aiohttp_client_cache.session import CachedSession
|
||||
from aiohttp_client_cache import FileBackend, RedisBackend
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
handler = EliotHandler()
|
||||
logging.getLogger("fastapi").setLevel(logging.INFO)
|
||||
logging.getLogger("fastapi").addHandler(handler)
|
||||
|
||||
if environ.get("DEBUG"):
|
||||
to_file(open("eliot.log", "wb"))
|
||||
|
||||
logger = logging.Logger("wpd")
|
||||
logger.addHandler(handler)
|
||||
|
||||
# --- #
|
||||
|
||||
|
||||
class CacheTypes(Enum):
|
||||
file = "file"
|
||||
redis = "redis"
|
||||
|
||||
|
||||
class Config(BaseSettings):
|
||||
USE_CACHE: bool = True
|
||||
CACHE_TYPE: CacheTypes = CacheTypes.file
|
||||
REDIS_CONNECTION_URL: str = ""
|
||||
|
||||
@field_validator("USE_CACHE", mode="before")
|
||||
def validate_use_cache(cls, value):
|
||||
# Return default if value is an empty string
|
||||
if value == "":
|
||||
return True # Default value for USE_CACHE
|
||||
return value
|
||||
|
||||
@field_validator("CACHE_TYPE", mode="before")
|
||||
def validate_cache_type(cls, value):
|
||||
# Thanks https://stackoverflow.com/a/78157474
|
||||
if value == "":
|
||||
return "file"
|
||||
return value
|
||||
|
||||
@model_validator(mode="after")
|
||||
def prevent_mismatched_redis_url(self):
|
||||
match self.CACHE_TYPE:
|
||||
case CacheTypes.file:
|
||||
if self.REDIS_CONNECTION_URL:
|
||||
raise ValueError(
|
||||
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
|
||||
)
|
||||
case CacheTypes.redis:
|
||||
if not self.REDIS_CONNECTION_URL:
|
||||
raise ValueError(
|
||||
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
# --- #
|
||||
|
||||
headers = {
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||
if config.USE_CACHE:
|
||||
match config.CACHE_TYPE:
|
||||
case CacheTypes.file:
|
||||
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||
case CacheTypes.redis:
|
||||
cache = RedisBackend(
|
||||
cache_name="wpd-aiohttp-cache", address=config.REDIS_CONNECTION_URL
|
||||
)
|
||||
else:
|
||||
cache = None
|
||||
|
||||
logger.info(f"Using {cache=}")
|
||||
|
||||
# --- Utilities --- #
|
||||
|
||||
|
||||
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||
|
||||
Args:
|
||||
username (str): Username.
|
||||
password (str): Password.
|
||||
|
||||
Raises:
|
||||
ValueError: Bad status code.
|
||||
ValueError: No cookies returned.
|
||||
|
||||
Returns:
|
||||
dict: Authorization cookies.
|
||||
"""
|
||||
async with ClientSession(headers=headers) as session:
|
||||
async with session.post(
|
||||
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
||||
data={
|
||||
"username": username.lower(),
|
||||
"password": password,
|
||||
}, # the username.lower() is for caching
|
||||
) as response:
|
||||
if response.status != 204:
|
||||
raise ValueError("Not a 204.")
|
||||
|
||||
cookies = {
|
||||
k: v.value
|
||||
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||
}
|
||||
|
||||
if not cookies:
|
||||
raise ValueError("No cookies.")
|
||||
|
||||
return cookies
|
||||
|
||||
|
||||
def slugify(value, allow_unicode=False) -> str:
|
||||
"""
|
||||
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
||||
@@ -79,83 +120,155 @@ def slugify(value, allow_unicode=False) -> str:
|
||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
||||
|
||||
|
||||
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||
|
||||
Args:
|
||||
username (str): Username.
|
||||
password (str): Password.
|
||||
|
||||
Raises:
|
||||
ValueError: Bad status code.
|
||||
ValueError: No cookies returned.
|
||||
|
||||
Returns:
|
||||
dict: Authorization cookies.
|
||||
"""
|
||||
with start_action(action_type="api_fetch_cookies"):
|
||||
async with CachedSession(headers=headers, cache=None) as session:
|
||||
async with session.post(
|
||||
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
|
||||
data={
|
||||
"username": username.lower(),
|
||||
"password": password,
|
||||
}, # the username.lower() is for caching
|
||||
) as response:
|
||||
if response.status != 204:
|
||||
raise ValueError("Not a 204.")
|
||||
|
||||
cookies = {
|
||||
k: v.value
|
||||
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||
}
|
||||
|
||||
if not cookies:
|
||||
raise ValueError("No cookies.")
|
||||
|
||||
return cookies
|
||||
|
||||
|
||||
# --- Models --- #
|
||||
|
||||
|
||||
class Language(TypedDict):
|
||||
name: str
|
||||
|
||||
|
||||
class User(TypedDict):
|
||||
username: str
|
||||
|
||||
|
||||
class Part(TypedDict):
|
||||
id: int
|
||||
title: str
|
||||
|
||||
|
||||
class Story(TypedDict):
|
||||
id: str
|
||||
title: str
|
||||
createDate: str
|
||||
modifyDate: str
|
||||
language: Language
|
||||
user: User
|
||||
description: str
|
||||
cover: str
|
||||
completed: bool
|
||||
tags: List[str]
|
||||
mature: bool
|
||||
url: str
|
||||
parts: List[Part]
|
||||
isPaywalled: bool
|
||||
|
||||
|
||||
story_ta = TypeAdapter(Story)
|
||||
|
||||
# --- API Calls --- #
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_story_id(part_id: int, cookies: Optional[dict] = None) -> int:
|
||||
async def fetch_story_from_partId(
|
||||
part_id: int, cookies: Optional[dict] = None
|
||||
) -> Tuple[str, Story]:
|
||||
"""Return a Story ID from a Part ID."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
with start_action(action_type="api_fetch_storyFromPartId"):
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None if cookies else cache
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover)"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.json()
|
||||
body = await response.json()
|
||||
|
||||
return body["groupId"]
|
||||
return str(body["groupId"]), story_ta.validate_python(body["group"])
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
|
||||
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> Story:
|
||||
"""Taking a story_id, return its information from the Wattpad API."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
with start_action(action_type="api_fetch_story", story_id=story_id):
|
||||
async with CachedSession(
|
||||
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||
) as session:
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.json()
|
||||
body = await response.json()
|
||||
|
||||
return body
|
||||
return story_ta.validate_python(body)
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
|
||||
"""Return the HTML Content of a Part."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
with start_action(action_type="api_fetch_partContent", part_id=part_id):
|
||||
async with CachedSession(
|
||||
headers=headers, cookies=cookies, cache=None if cookies else cache
|
||||
) as session:
|
||||
async with session.get(
|
||||
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.text()
|
||||
body = await response.text()
|
||||
|
||||
return body
|
||||
return body
|
||||
|
||||
|
||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||
async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
||||
"""Fetch image bytes."""
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with session.get(url) as response:
|
||||
response.raise_for_status()
|
||||
async def fetch_cover(url: str) -> bytes:
|
||||
"""Fetch cover image bytes."""
|
||||
with start_action(action_type="api_fetch_cover", url=url):
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None
|
||||
) as session: # Don't cache images.
|
||||
async with session.get(url) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
body = await response.read()
|
||||
body = await response.read()
|
||||
|
||||
return body
|
||||
return body
|
||||
|
||||
|
||||
# --- EPUB Generation --- #
|
||||
|
||||
|
||||
def set_metadata(book, data):
|
||||
def set_metadata(book: EpubBook, data: Story) -> None:
|
||||
"""Set book metadata."""
|
||||
book.add_author(data["user"]["username"])
|
||||
|
||||
book.add_metadata("DC", "title", data["title"])
|
||||
@@ -175,16 +288,20 @@ def set_metadata(book, data):
|
||||
)
|
||||
|
||||
|
||||
async def set_cover(book, data, cookies: Optional[dict] = None):
|
||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
|
||||
async def set_cover(book: EpubBook, data: Story) -> None:
|
||||
"""Set book cover."""
|
||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
|
||||
chapter = epub.EpubHtml(
|
||||
file_name=f"titlepage.xhtml", # Standard for cover page
|
||||
file_name="titlepage.xhtml", # Standard for cover page
|
||||
)
|
||||
chapter.set_content('<img src="cover.jpg">')
|
||||
|
||||
|
||||
async def add_chapters(
|
||||
book, data, download_images: bool = False, cookies: Optional[dict] = None
|
||||
book: EpubBook,
|
||||
data: Story,
|
||||
download_images: bool = False,
|
||||
cookies: Optional[dict] = None,
|
||||
):
|
||||
chapters = []
|
||||
|
||||
@@ -202,11 +319,9 @@ async def add_chapters(
|
||||
if download_images:
|
||||
soup = BeautifulSoup(content, "lxml")
|
||||
|
||||
async with (
|
||||
CachedSession(headers=headers, cache=cache)
|
||||
if not cookies
|
||||
else ClientSession(headers=headers, cookies=cookies)
|
||||
) as session: # Don't cache requests with Cookies.
|
||||
async with CachedSession(
|
||||
headers=headers, cache=None
|
||||
) as session: # Don't cache images.
|
||||
for idx, image in enumerate(soup.find_all("img")):
|
||||
if not image["src"]:
|
||||
continue
|
||||
@@ -234,7 +349,7 @@ async def add_chapters(
|
||||
for chapter in chapters:
|
||||
book.add_item(chapter)
|
||||
|
||||
book.toc = tuple(chapters)
|
||||
book.toc = chapters
|
||||
|
||||
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
||||
book.add_item(epub.EpubNcx())
|
||||
|
||||
+102
-52
@@ -1,10 +1,12 @@
|
||||
"""WattpadDownloader API Server."""
|
||||
|
||||
from typing import Optional
|
||||
import asyncio
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from io import BytesIO
|
||||
from enum import Enum
|
||||
from eliot import start_action
|
||||
from aiohttp import ClientResponseError
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
||||
@@ -17,9 +19,11 @@ from create_book import (
|
||||
add_chapters,
|
||||
slugify,
|
||||
wp_get_cookies,
|
||||
fetch_story_id,
|
||||
fetch_story_from_partId,
|
||||
logger,
|
||||
)
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
BUILD_PATH = Path(__file__).parent / "build"
|
||||
|
||||
@@ -28,10 +32,46 @@ headers = {
|
||||
}
|
||||
|
||||
|
||||
class RequestCancelledMiddleware:
|
||||
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope, receive, send):
|
||||
if scope["type"] != "http":
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
# Let's make a shared queue for the request messages
|
||||
queue = asyncio.Queue()
|
||||
|
||||
async def message_poller(sentinel, handler_task):
|
||||
nonlocal queue
|
||||
while True:
|
||||
message = await receive()
|
||||
if message["type"] == "http.disconnect":
|
||||
handler_task.cancel()
|
||||
return sentinel # Break the loop
|
||||
|
||||
# Puts the message in the queue
|
||||
await queue.put(message)
|
||||
|
||||
sentinel = object()
|
||||
handler_task = asyncio.create_task(self.app(scope, queue.get, send))
|
||||
asyncio.create_task(message_poller(sentinel, handler_task))
|
||||
|
||||
try:
|
||||
return await handler_task
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Cancelling task as connection closed")
|
||||
|
||||
|
||||
app.add_middleware(RequestCancelledMiddleware)
|
||||
|
||||
|
||||
class DownloadMode(Enum):
|
||||
story = "story"
|
||||
part = "part"
|
||||
collection = "collection"
|
||||
|
||||
|
||||
@app.get("/")
|
||||
@@ -69,60 +109,70 @@ async def handle_download(
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
):
|
||||
if username and not password or password and not username:
|
||||
return HTMLResponse(
|
||||
status_code=422,
|
||||
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
|
||||
if username and password:
|
||||
# username and password are URL-Encoded by the frontend. FastAPI automatically decodes them.
|
||||
try:
|
||||
cookies = await wp_get_cookies(username=username, password=password)
|
||||
except ValueError:
|
||||
return HTMLResponse(
|
||||
status_code=403,
|
||||
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
else:
|
||||
cookies = None
|
||||
|
||||
match mode:
|
||||
case DownloadMode.story:
|
||||
story_id = download_id
|
||||
case DownloadMode.part:
|
||||
story_id = await fetch_story_id(download_id, cookies)
|
||||
|
||||
book = epub.EpubBook()
|
||||
|
||||
metadata = await retrieve_story(story_id, cookies)
|
||||
set_metadata(book, metadata)
|
||||
|
||||
await set_cover(book, metadata, cookies=cookies)
|
||||
|
||||
async for title in add_chapters(
|
||||
book, metadata, download_images=download_images, cookies=cookies
|
||||
with start_action(
|
||||
action_type="download",
|
||||
download_id=download_id,
|
||||
download_images=download_images,
|
||||
mode=mode,
|
||||
):
|
||||
...
|
||||
if username and not password or password and not username:
|
||||
logger.error(
|
||||
"Username with no Password or Password with no Username provided."
|
||||
)
|
||||
return HTMLResponse(
|
||||
status_code=422,
|
||||
content='Include both the username <u>and</u> password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
|
||||
# Book is compiled
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
suffix=".epub", delete=True
|
||||
) # Thanks https://stackoverflow.com/a/75398222
|
||||
if username and password:
|
||||
# username and password are URL-Encoded by the frontend. FastAPI automatically decodes them.
|
||||
try:
|
||||
cookies = await wp_get_cookies(username=username, password=password)
|
||||
except ValueError:
|
||||
logger.error("Invalid username or password.")
|
||||
return HTMLResponse(
|
||||
status_code=403,
|
||||
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||
)
|
||||
else:
|
||||
cookies = None
|
||||
|
||||
# create epub file
|
||||
epub.write_epub(temp_file, book, {})
|
||||
match mode:
|
||||
case DownloadMode.story:
|
||||
story_id = download_id
|
||||
metadata = await retrieve_story(story_id, cookies)
|
||||
case DownloadMode.part:
|
||||
story_id, metadata = await fetch_story_from_partId(download_id, cookies)
|
||||
|
||||
temp_file.file.seek(0)
|
||||
book_data = temp_file.file.read()
|
||||
logger.info(f"Retrieved story id ({story_id=})")
|
||||
|
||||
return StreamingResponse(
|
||||
BytesIO(book_data),
|
||||
media_type="application/epub+zip",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}_{"images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
||||
},
|
||||
)
|
||||
book = epub.EpubBook()
|
||||
set_metadata(book, metadata)
|
||||
await set_cover(book, metadata)
|
||||
|
||||
async for title in add_chapters(
|
||||
book, metadata, download_images=download_images, cookies=cookies
|
||||
):
|
||||
...
|
||||
|
||||
# Book is compiled
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
suffix=".epub", delete=True
|
||||
) # Thanks https://stackoverflow.com/a/75398222
|
||||
|
||||
# create epub file
|
||||
epub.write_epub(temp_file, book, {})
|
||||
|
||||
temp_file.file.seek(0)
|
||||
book_data = temp_file.file.read()
|
||||
|
||||
return StreamingResponse(
|
||||
BytesIO(book_data),
|
||||
media_type="application/epub+zip",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
|
||||
@@ -131,4 +181,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=80)
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16)
|
||||
|
||||
Generated
+1083
File diff suppressed because it is too large
Load Diff
@@ -31,36 +31,49 @@
|
||||
input_url = input_url.toLowerCase();
|
||||
|
||||
invalid_url = false;
|
||||
if (!input_url.includes("wattpad.com/")) {
|
||||
|
||||
if (/^\d+$/.test(input_url)) {
|
||||
// All numbers
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else if (input_url.includes("wattpad.com/")) {
|
||||
// Is a string and contains contain wattpad.com/
|
||||
|
||||
if (input_url.includes("/story/")) {
|
||||
// https://wattpad.com/story/237369078-wattpad-books-presents
|
||||
input_url = input_url.split("-")[0].split("/story/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else if (input_url.includes("/stories/")) {
|
||||
// https://www.wattpad.com/api/v3/stories/237369078?fields=...
|
||||
input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else {
|
||||
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
|
||||
input_url = input_url.split("-")[0].split("wattpad.com/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
if (/^\d+$/.test(download_id)) {
|
||||
// If "wattpad.com/{download_id}" contains only numbers
|
||||
mode = "part";
|
||||
} else {
|
||||
invalid_url = true;
|
||||
input_url = "";
|
||||
download_id = "";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
invalid_url = true;
|
||||
}
|
||||
|
||||
input_url = input_url.match(/\d+/g)?.join("") || "";
|
||||
download_id = input_url;
|
||||
|
||||
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
|
||||
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
|
||||
|
||||
if (input_url.includes("/story/")) {
|
||||
// https://wattpad.com/story/237369078-wattpad-books-presents
|
||||
input_url = input_url.split("-")[0].split("/story/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else if (input_url.includes("/stories/")) {
|
||||
// https://www.wattpad.com/api/v3/stories/237369078?fields=...
|
||||
input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
|
||||
download_id = input_url;
|
||||
mode = "story";
|
||||
} else {
|
||||
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
|
||||
input_url = input_url.split("-")[0].split("wattpad.com/")[1]; // removes tracking fields and title
|
||||
download_id = input_url;
|
||||
if (/^\d+$/.test(download_id)) {
|
||||
// If "wattpad.com/{download_id}" contains only numbers
|
||||
mode = "part";
|
||||
} else {
|
||||
invalid_url = true;
|
||||
input_url = "";
|
||||
download_id = "";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
invalid_url = false;
|
||||
download_id = "";
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
Reference in New Issue
Block a user