fix(api): Improve readability

This commit is contained in:
TheOnlyWayUp
2024-11-30 21:25:07 +00:00
parent 8b00d0b109
commit a31c26f8c5
2 changed files with 43 additions and 33 deletions
+41 -31
View File
@@ -9,6 +9,7 @@ from eliot import to_file, start_action
from eliot.stdlib import EliotHandler from eliot.stdlib import EliotHandler
from dotenv import load_dotenv from dotenv import load_dotenv
from ebooklib import epub from ebooklib import epub
from ebooklib.epub import EpubBook
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pydantic import model_validator, field_validator from pydantic import model_validator, field_validator
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
@@ -28,6 +29,8 @@ if environ.get("DEBUG"):
logger = logging.Logger("wpd") logger = logging.Logger("wpd")
logger.addHandler(handler) logger.addHandler(handler)
# --- #
class CacheTypes(Enum): class CacheTypes(Enum):
file = "file" file = "file"
@@ -71,6 +74,8 @@ class Config(BaseSettings):
config = Config() config = Config()
# --- #
headers = { headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
} }
@@ -91,6 +96,29 @@ logger.info(f"Using {cache=}")
# --- Utilities --- # # --- Utilities --- #
def slugify(value, allow_unicode=False) -> str:
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
Thanks https://stackoverflow.com/a/295466.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-\s]+", "-", value).strip("-_")
async def wp_get_cookies(username: str, password: str) -> dict: async def wp_get_cookies(username: str, password: str) -> dict:
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58 # source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Retrieves authorization cookies from Wattpad by logging in with user creds. """Retrieves authorization cookies from Wattpad by logging in with user creds.
@@ -129,38 +157,15 @@ async def wp_get_cookies(username: str, password: str) -> dict:
return cookies return cookies
def slugify(value, allow_unicode=False) -> str:
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
Thanks https://stackoverflow.com/a/295466.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-\s]+", "-", value).strip("-_")
# --- API Calls --- # # --- API Calls --- #
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_id( async def fetch_story_from_partId(
part_id: int, cookies: Optional[dict] = None part_id: int, cookies: Optional[dict] = None
) -> Tuple[int, dict]: ) -> Tuple[int, dict]:
"""Return a Story ID from a Part ID.""" """Return a Story ID from a Part ID."""
with start_action(action_type="api_fetch_storyFromPart"): with start_action(action_type="api_fetch_storyFromPartId"):
async with CachedSession( async with CachedSession(
headers=headers, cache=None if cookies else cache headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies. ) as session: # Don't cache requests with Cookies.
@@ -214,7 +219,7 @@ async def fetch_cover(url: str) -> bytes:
with start_action(action_type="api_fetch_cover", url=url): with start_action(action_type="api_fetch_cover", url=url):
async with CachedSession( async with CachedSession(
headers=headers, cache=None headers=headers, cache=None
) as session: # Don't cache cover requests. ) as session: # Don't cache images.
async with session.get(url) as response: async with session.get(url) as response:
response.raise_for_status() response.raise_for_status()
@@ -226,7 +231,8 @@ async def fetch_cover(url: str) -> bytes:
# --- EPUB Generation --- # # --- EPUB Generation --- #
def set_metadata(book, data): def set_metadata(book: EpubBook, data: dict) -> None:
"""Set book metadata."""
book.add_author(data["user"]["username"]) book.add_author(data["user"]["username"])
book.add_metadata("DC", "title", data["title"]) book.add_metadata("DC", "title", data["title"])
@@ -246,7 +252,8 @@ def set_metadata(book, data):
) )
async def set_cover(book, data): async def set_cover(book: EpubBook, data: dict) -> None:
"""Set book cover."""
book.set_cover("cover.jpg", await fetch_cover(data["cover"])) book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
chapter = epub.EpubHtml( chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page file_name="titlepage.xhtml", # Standard for cover page
@@ -255,7 +262,10 @@ async def set_cover(book, data):
async def add_chapters( async def add_chapters(
book, data, download_images: bool = False, cookies: Optional[dict] = None book: EpubBook,
data: dict,
download_images: bool = False,
cookies: Optional[dict] = None,
): ):
chapters = [] chapters = []
@@ -275,7 +285,7 @@ async def add_chapters(
async with CachedSession( async with CachedSession(
headers=headers, cache=None headers=headers, cache=None
) as session: # Don't cache requests for images. ) as session: # Don't cache images.
for idx, image in enumerate(soup.find_all("img")): for idx, image in enumerate(soup.find_all("img")):
if not image["src"]: if not image["src"]:
continue continue
@@ -303,7 +313,7 @@ async def add_chapters(
for chapter in chapters: for chapter in chapters:
book.add_item(chapter) book.add_item(chapter)
book.toc = tuple(chapters) book.toc = chapters
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNcx())
+2 -2
View File
@@ -19,7 +19,7 @@ from create_book import (
add_chapters, add_chapters,
slugify, slugify,
wp_get_cookies, wp_get_cookies,
fetch_story_id, fetch_story_from_partId,
logger, logger,
) )
@@ -143,7 +143,7 @@ async def handle_download(
story_id = download_id story_id = download_id
metadata = await retrieve_story(story_id, cookies) metadata = await retrieve_story(story_id, cookies)
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_id(download_id, cookies) story_id, metadata = await fetch_story_from_partId(download_id, cookies)
logger.error(f"Retrieved story id ({story_id=})") logger.error(f"Retrieved story id ({story_id=})")