fix: reduce image size
All checks were successful
books / lint (push) Successful in 1m22s

This commit is contained in:
Michel Roux 2025-02-14 08:59:37 +00:00
parent ee00265a9b
commit 3f94553e7c
3 changed files with 17 additions and 61 deletions

View File

@ -6,11 +6,10 @@ from random import randint
from tempfile import NamedTemporaryFile
from time import sleep
from typing import cast
from urllib import error, parse
from uuid import uuid4
from bs4 import BeautifulSoup
from curl_cffi import requests
from PIL import Image
from pypub import ( # type: ignore
Chapter,
Epub,
@ -44,68 +43,24 @@ class MyChapterFactory(SimpleChapterFactory): # type: ignore
# return new element-tree
return etree
def render_images(self, ctx: factory.RenderCtx) -> None:
"""
replace global image references w/ local downloaded ones
"""
downloads: dict[str, str] = {}
for image in ctx.etree.xpath(".//img[@src]"):
# cleanup link and resolve relative paths
url = image.attrib["src"].rsplit("?", 1)[0]
fmt = (ctx.chapter.title, url)
if "://" not in url:
if not ctx.chapter.url:
ctx.logger.warning(
"chapter[{}] cannot render image {!r} w/o chapter-url".format(
*fmt
)
)
continue
url = parse.urljoin(ctx.chapter.url, url)
fmt = (ctx.chapter.title, url)
# skip if url has already been downloaded
if url in downloads:
image.attrib["src"] = downloads[url]
continue
# download url into local image folder for epub
ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
try:
res = requests.get(
url,
impersonate=requests.BrowserType.firefox133.value,
thread=None,
curl_options=None,
debug=False,
)
# ensure status of response is valid
status = getattr(res, "status", None)
if status and status != 200:
raise error.URLError(f"status: {status}")
# read first chunk to determine content-type
chunk = res.content
mime = factory.mime_type(url, chunk)
if not mime:
ctx.logger.warning(
"chapter[{}] cannot identify {!r} mime".format(*fmt)
)
continue
fname = f"image-{uuid4()}.{mime}"
fpath = path.join(ctx.imagedir, fname)
# read rest of the content into associated file
with open(fpath, "wb") as f:
f.write(chunk)
# save epub-path in downloads cache and update image attribs
epub_path = path.join("images/", fname)
downloads[url] = epub_path
image.attrib["src"] = epub_path
except error.URLError:
ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
def hydrate(self, ctx: factory.RenderCtx) -> None:
"""
modify chapter element-tree to render images
"""
self.render_images(ctx)
ctx.timeout = 100
factory.render_images(ctx)
for image in ctx.etree.xpath(".//img[@src]"):
# cleanup link and resolve relative paths
url = image.attrib["src"].rsplit("?", 1)[0]
if "://" in url:
# retrieve parent
parent = cast(HtmlElement, image.getparent())
parent.remove(image)
else:
fpath = path.join(ctx.imagedir, url.replace("images/", ""))
image = Image.open(fpath)
image.thumbnail((1000, 2000))
image.save(fpath)
if ctx.extern_links and ctx.chapter.url:
factory.externalize_links(ctx.chapter.url, ctx.etree)

2
poetry.lock generated
View File

@ -556,4 +556,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = "^3.9"
content-hash = "84a41aac6241a5e1eec3635df8c2e51d27506f1098f0e3c45dbed90aa26e5c60"
content-hash = "f6160acfe46ff419f788eda7f0d7ff8bd5ab69b2a6bc188331e2501cb6bd41a8"

View File

@ -9,6 +9,7 @@ license = "WTFPL"
python = "^3.9"
beautifulsoup4 = "^4.13.3"
curl_cffi = "^0.9.0b2"
pillow = "^11.1.0"
pypub3 = "^2.0.7"
pyxml3 = "^0.0.4"