fix(api): Type errors in parser

This commit is contained in:
TheOnlyWayUp
2025-06-09 14:30:28 +00:00
parent 1fc8c59992
commit a790021057
2 changed files with 12 additions and 11 deletions
+9 -10
View File
@@ -1,6 +1,6 @@
import asyncio import asyncio
from itertools import batched, chain from itertools import batched, chain
from typing import List, Tuple from typing import Generator, List, Tuple, cast
from aiohttp import ClientSession from aiohttp import ClientSession
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
@@ -18,15 +18,16 @@ def clean_tree(title: str, id: int, body: str) -> BeautifulSoup:
""" """
) )
insert_at = new_soup.find("section") insert_at = cast(Tag, new_soup.find("section"))
for tag in list(original_soup.find("body").children): children = cast(Tag, original_soup.find("body")).children
for tag in cast(list[Tag], list(children)):
if tag.name != "p": # Casted to lower if tag.name != "p": # Casted to lower
print(tag.name) print(tag.name)
continue continue
style = tag.attrs.get("style") style = tag.attrs.get("style")
for child in tag.children: for child in cast(list[Tag], tag.children):
# tag is a <p> enclosing either text, media, or a break # tag is a <p> enclosing either text, media, or a break
if child.name in [None, "b", "i", "u"]: if child.name in [None, "b", "i", "u"]:
@@ -73,12 +74,10 @@ async def fetch_image(url: str) -> bytes | None:
return body return body
async def download_tree_images(tree: BeautifulSoup) -> Tuple[bytes]: async def download_tree_images(tree: BeautifulSoup) -> Generator[bytes]:
image_urls = [img["src"] for img in tree.find_all("img")] image_urls = [img["src"] for img in tree.find_all("img")]
downloaded_images: List[bytes] = list( downloaded_images: Generator[bytes] = chain(
chain( await asyncio.gather(*[fetch_image(url) for url in chunk])
await asyncio.gather(*[fetch_image(url) for url in chunk]) for chunk in batched(image_urls, 3)
for chunk in batched(image_urls, 3)
)
) )
return downloaded_images return downloaded_images
+3 -1
View File
@@ -8,7 +8,7 @@ from zipfile import ZipFile
from aiohttp import ClientResponseError from aiohttp import ClientResponseError
from eliot import start_action from eliot import start_action
from fastapi import FastAPI, Request from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import ( from fastapi.responses import (
FileResponse, FileResponse,
HTMLResponse, HTMLResponse,
@@ -166,6 +166,8 @@ async def handle_download(
cover_data = await fetch_image( cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-") metadata["cover"].replace("-256-", "-512-")
) # Increase resolution ) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
match format: match format:
case DownloadFormat.epub: case DownloadFormat.epub: