This commit is contained in:
parent
ee00265a9b
commit
3f94553e7c
@ -6,11 +6,10 @@ from random import randint
|
||||
from tempfile import NamedTemporaryFile
|
||||
from time import sleep
|
||||
from typing import cast
|
||||
from urllib import error, parse
|
||||
from uuid import uuid4
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from curl_cffi import requests
|
||||
from PIL import Image
|
||||
from pypub import ( # type: ignore
|
||||
Chapter,
|
||||
Epub,
|
||||
@ -44,68 +43,24 @@ class MyChapterFactory(SimpleChapterFactory): # type: ignore
|
||||
# return new element-tree
|
||||
return etree
|
||||
|
||||
def render_images(self, ctx: factory.RenderCtx) -> None:
|
||||
"""
|
||||
replace global image references w/ local downloaded ones
|
||||
"""
|
||||
downloads: dict[str, str] = {}
|
||||
for image in ctx.etree.xpath(".//img[@src]"):
|
||||
# cleanup link and resolve relative paths
|
||||
url = image.attrib["src"].rsplit("?", 1)[0]
|
||||
fmt = (ctx.chapter.title, url)
|
||||
if "://" not in url:
|
||||
if not ctx.chapter.url:
|
||||
ctx.logger.warning(
|
||||
"chapter[{}] cannot render image {!r} w/o chapter-url".format(
|
||||
*fmt
|
||||
)
|
||||
)
|
||||
continue
|
||||
url = parse.urljoin(ctx.chapter.url, url)
|
||||
fmt = (ctx.chapter.title, url)
|
||||
# skip if url has already been downloaded
|
||||
if url in downloads:
|
||||
image.attrib["src"] = downloads[url]
|
||||
continue
|
||||
# download url into local image folder for epub
|
||||
ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
|
||||
try:
|
||||
res = requests.get(
|
||||
url,
|
||||
impersonate=requests.BrowserType.firefox133.value,
|
||||
thread=None,
|
||||
curl_options=None,
|
||||
debug=False,
|
||||
)
|
||||
# ensure status of response is valid
|
||||
status = getattr(res, "status", None)
|
||||
if status and status != 200:
|
||||
raise error.URLError(f"status: {status}")
|
||||
# read first chunk to determine content-type
|
||||
chunk = res.content
|
||||
mime = factory.mime_type(url, chunk)
|
||||
if not mime:
|
||||
ctx.logger.warning(
|
||||
"chapter[{}] cannot identify {!r} mime".format(*fmt)
|
||||
)
|
||||
continue
|
||||
fname = f"image-{uuid4()}.{mime}"
|
||||
fpath = path.join(ctx.imagedir, fname)
|
||||
# read rest of the content into associated file
|
||||
with open(fpath, "wb") as f:
|
||||
f.write(chunk)
|
||||
# save epub-path in downloads cache and update image attribs
|
||||
epub_path = path.join("images/", fname)
|
||||
downloads[url] = epub_path
|
||||
image.attrib["src"] = epub_path
|
||||
except error.URLError:
|
||||
ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
|
||||
|
||||
def hydrate(self, ctx: factory.RenderCtx) -> None:
|
||||
"""
|
||||
modify chapter element-tree to render images
|
||||
"""
|
||||
self.render_images(ctx)
|
||||
ctx.timeout = 100
|
||||
factory.render_images(ctx)
|
||||
for image in ctx.etree.xpath(".//img[@src]"):
|
||||
# cleanup link and resolve relative paths
|
||||
url = image.attrib["src"].rsplit("?", 1)[0]
|
||||
if "://" in url:
|
||||
# retrieve parent
|
||||
parent = cast(HtmlElement, image.getparent())
|
||||
parent.remove(image)
|
||||
else:
|
||||
fpath = path.join(ctx.imagedir, url.replace("images/", ""))
|
||||
image = Image.open(fpath)
|
||||
image.thumbnail((1000, 2000))
|
||||
image.save(fpath)
|
||||
if ctx.extern_links and ctx.chapter.url:
|
||||
factory.externalize_links(ctx.chapter.url, ctx.etree)
|
||||
|
||||
|
2
poetry.lock
generated
2
poetry.lock
generated
@ -556,4 +556,4 @@ files = [
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "84a41aac6241a5e1eec3635df8c2e51d27506f1098f0e3c45dbed90aa26e5c60"
|
||||
content-hash = "f6160acfe46ff419f788eda7f0d7ff8bd5ab69b2a6bc188331e2501cb6bd41a8"
|
||||
|
@ -9,6 +9,7 @@ license = "WTFPL"
|
||||
python = "^3.9"
|
||||
beautifulsoup4 = "^4.13.3"
|
||||
curl_cffi = "^0.9.0b2"
|
||||
pillow = "^11.1.0"
|
||||
pypub3 = "^2.0.7"
|
||||
pyxml3 = "^0.0.4"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user