From 7c91149e7a5f9c6e592990f28c7d8595258ef411 Mon Sep 17 00:00:00 2001 From: TheOnlyWayUp Date: Sun, 30 Jun 2024 07:46:21 +0000 Subject: [PATCH] feat(api): Support authentication --- src/api/src/create_book.py | 166 +++++++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 54 deletions(-) diff --git a/src/api/src/create_book.py b/src/api/src/create_book.py index 85abf08..96dc8a5 100644 --- a/src/api/src/create_book.py +++ b/src/api/src/create_book.py @@ -1,9 +1,10 @@ import asyncio +from typing import Optional from ebooklib import epub import unicodedata import re import backoff -from aiohttp import ClientResponseError +from aiohttp import ClientResponseError, ClientSession from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache import FileBackend from bs4 import BeautifulSoup @@ -13,59 +14,46 @@ headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" } -cache = FileBackend( - use_temp=True, - expire_after=43200, # 12 hours -) +cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours + +# --- Utilities --- # -@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) -async def retrieve_story(story_id: int, retry=True) -> dict: - """Taking a story_id, return its information from the Wattpad API.""" - async with CachedSession(headers=headers, cache=cache) as session: - async with session.get( - f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover" +async def wp_get_cookies(username: str, password: str) -> dict: + # source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58 + """Retrieves authorization cookies from Wattpad by logging in with user creds. + + Args: + username (str): Username. + password (str): Password. + + Raises: + ValueError: Bad status code. + ValueError: No cookies returned. + + Returns: + dict: Authorization cookies. + """ + async with ClientSession(headers=headers) as session: + async with session.post( + "https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth%2Flogin", + data={ + "username": username.lower(), + "password": password, + }, # the username.lower() is for caching ) as response: - if not response.ok: - if response.status in [404, 400]: - return {} - response.raise_for_status() + if response.status != 204: + raise ValueError("Not a 204.") - body = await response.json() + cookies = { + k: v.value + for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245 + } - return body + if not cookies: + raise ValueError("No cookies.") - -@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) -async def fetch_part_content(part_id: int) -> str: - """Return the HTML Content of a Part.""" - async with CachedSession(headers=headers, cache=cache) as session: - async with session.get( - f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}" - ) as response: - if not response.ok: - if response.status in [404, 400]: - return "" - response.raise_for_status() - - body = await response.text() - - return body - - -@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) -async def fetch_cover(url: str) -> bytes: - """Fetch image bytes.""" - async with CachedSession(headers=headers, cache=cache) as session: - async with session.get(url) as response: - if not response.ok: - if response.status in [404, 400]: - return bytes() - response.raise_for_status() - - body = await response.read() - - return body + return cookies def slugify(value, allow_unicode=False) -> str: @@ -91,7 +79,71 @@ def slugify(value, allow_unicode=False) -> str: return re.sub(r"[-\s]+", "-", value).strip("-_") -# --- # +# --- API Calls --- # + + +@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) +async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict: + """Taking a story_id, return its information from the Wattpad API.""" + async with ( + CachedSession(headers=headers, cache=cache) + if not cookies + else ClientSession(headers=headers, cookies=cookies) + ) as session: # Don't cache requests with Cookies. + async with session.get( + f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover" + ) as response: + if not response.ok: + if response.status in [404, 400]: + return {} + response.raise_for_status() + + body = await response.json() + + return body + + +@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) +async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str: + """Return the HTML Content of a Part.""" + async with ( + CachedSession(headers=headers, cache=cache) + if not cookies + else ClientSession(headers=headers, cookies=cookies) + ) as session: # Don't cache requests with Cookies. + async with session.get( + f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}" + ) as response: + if not response.ok: + if response.status in [404, 400]: + return "" + response.raise_for_status() + + body = await response.text() + + return body + + +@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) +async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes: + """Fetch image bytes.""" + async with ( + CachedSession(headers=headers, cache=cache) + if not cookies + else ClientSession(headers=headers, cookies=cookies) + ) as session: # Don't cache requests with Cookies. + async with session.get(url) as response: + if not response.ok: + if response.status in [404, 400]: + return bytes() + response.raise_for_status() + + body = await response.read() + + return body + + +# --- EPUB Generation --- # def set_metadata(book, data): @@ -113,15 +165,17 @@ def set_metadata(book, data): ) -async def set_cover(book, data): - book.set_cover("cover.jpg", await fetch_cover(data["cover"])) +async def set_cover(book, data, cookies: Optional[dict] = None): + book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies)) -async def add_chapters(book, data, download_images: bool = False): +async def add_chapters( + book, data, download_images: bool = False, cookies: Optional[dict] = None +): chapters = [] for part in data["parts"]: - content = await fetch_part_content(part["id"]) + content = await fetch_part_content(part["id"], cookies=cookies) title = part["title"] clean_title = slugify(title) @@ -134,7 +188,11 @@ async def add_chapters(book, data, download_images: bool = False): if download_images: soup = BeautifulSoup(content, "lxml") - async with CachedSession(cache=cache, headers=headers) as session: + async with ( + CachedSession(headers=headers, cache=cache) + if not cookies + else ClientSession(headers=headers, cookies=cookies) + ) as session: # Don't cache requests with Cookies. for idx, image in enumerate(soup.find_all("img")): if not image["src"]: continue