From 40624b61d061e6d0d5a81ec1b08606eb0c4c7b77 Mon Sep 17 00:00:00 2001 From: TheOnlyWayUp Date: Fri, 29 Dec 2023 02:21:45 +0000 Subject: [PATCH] feat(api): API Functional --- .gitignore | 6 ++ .vscode/settings.json | 3 + src/api/create_book.py | 150 +++++++++++++++++++++++++++++++++++++++++ src/api/main.py | 47 +++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/settings.json create mode 100644 src/api/create_book.py create mode 100644 src/api/main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4ac23a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__ +venv +*epub +*html +data +*ipynb diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b881eff --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.autoImportCompletions": true +} \ No newline at end of file diff --git a/src/api/create_book.py b/src/api/create_book.py new file mode 100644 index 0000000..e2354dd --- /dev/null +++ b/src/api/create_book.py @@ -0,0 +1,150 @@ +import aiohttp, asyncio +from ebooklib import epub +import unicodedata +import re + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" +} + + +async def retrieve_story(story_id: int, retry=True) -> dict: + """Taking a story_id, return its information from the Wattpad API.""" + async with aiohttp.ClientSession(headers=headers) as session: + try: + async with session.get( + f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover" + ) as response: + if not response.ok: + if response.status in [404, 400]: + return {} + raise ValueError("Status Code:", response.status) + body = await response.json() + except ValueError: + if not retry: + raise asyncio.TimeoutError() + await asyncio.sleep(15) + return await retrieve_story(story_id, retry=False) + + return body + + +async def fetch_part_content(part_id: int) -> str: + """Return the HTML Content of a Part.""" + async with aiohttp.ClientSession(headers=headers) as session: + try: + async with session.get( + f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}" + ) as response: + if not response.ok: + if response.status in [404, 400]: + return "" + raise ValueError("Status Code:", response.status) + body = await response.text() + except ValueError: + if not retry: + raise asyncio.TimeoutError() + await asyncio.sleep(15) + return await fetch_part_content(story_id, retry=False) + + return body + + +async def fetch_cover(url: str) -> bytes: + """Fetch image bytes.""" + async with aiohttp.ClientSession(headers=headers) as session: + try: + async with session.get(url) as response: + if not response.ok: + if response.status in [404, 400]: + return bytes() + raise ValueError("Status Code:", response.status) + body = await response.read() + except ValueError: + if not retry: + raise asyncio.TimeoutError() + await asyncio.sleep(15) + return await fetch_part_content(story_id, retry=False) + + return body + + +def slugify(value, allow_unicode=False) -> str: + """ + Taken from https://github.com/django/django/blob/master/django/utils/text.py + Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated + dashes to single dashes. Remove characters that aren't alphanumerics, + underscores, or hyphens. Convert to lowercase. Also strip leading and + trailing whitespace, dashes, and underscores. + + Thanks https://stackoverflow.com/a/295466. + """ + value = str(value) + if allow_unicode: + value = unicodedata.normalize("NFKC", value) + else: + value = ( + unicodedata.normalize("NFKD", value) + .encode("ascii", "ignore") + .decode("ascii") + ) + value = re.sub(r"[^\w\s-]", "", value.lower()) + return re.sub(r"[-\s]+", "-", value).strip("-_") + + +# --- # + + +def set_metadata(book, data): + book.add_author(data["user"]["username"]) + + book.add_metadata("DC", "description", data["description"]) + book.add_metadata("DC", "created", data["createDate"]) + book.add_metadata("DC", "modified", data["modifyDate"]) + book.add_metadata("DC", "language", data["language"]["name"]) + + book.add_metadata( + None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])} + ) + book.add_metadata( + None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))} + ) + book.add_metadata( + None, "meta", "", {"name": "completed", "content": str(int(data["completed"]))} + ) + + +async def set_cover(book, data): + book.set_cover("cover.jpg", await fetch_cover(data["cover"])) + + +async def add_chapters(book, data): + chapters = [] + + for part in data["parts"]: + content = await fetch_part_content(part["id"]) + title = part["title"] + + # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 + chapter = epub.EpubHtml( + title=title, + file_name=f"{slugify(title)}.xhtml", + lang=data["language"]["name"], + ) + chapter.set_content(f"

{title}

" + content) + + chapters.append(chapter) + + yield title # Yield the chapter's title upon insertion preceeded by retrieval. + + for chapter in chapters: + book.add_item(chapter) + + book.toc = tuple(chapters) + + # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # create spine + book.spine = ["nav"] + chapters diff --git a/src/api/main.py b/src/api/main.py new file mode 100644 index 0000000..b9b7fa9 --- /dev/null +++ b/src/api/main.py @@ -0,0 +1,47 @@ +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +from ebooklib import epub +from create_book import retrieve_story, set_cover, set_metadata, add_chapters +import tempfile +from io import BytesIO + +app = FastAPI() + + +@app.get("/download/{story_id}") +async def download_book(story_id: int): + data = await retrieve_story(story_id) + book = epub.EpubBook() + + # Metadata and Cover are updated + set_metadata(book, data) + await set_cover(book, data) + # print("Metadata Downloaded") + + # Chapters are downloaded + async for title in add_chapters(book, data): + # print(f"Part ({title}) downloaded") + ... + + # Book is compiled + temp_file = tempfile.NamedTemporaryFile( + dir=".", suffix=".epub", delete=True + ) # Thanks https://stackoverflow.com/a/75398222 + + # create epub file + epub.write_epub(temp_file, book, {}) + + temp_file.file.seek(0) + book_data = temp_file.file.read() + + return StreamingResponse( + BytesIO(book_data), + media_type="application/epub+zip", + headers={"Content-Disposition": f'attachment; filename="book_{story_id}.epub"'}, + ) + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=80)