fix: ⚡ reduce image size

2025-02-14 08:59:37 +00:00 · 2025-02-14 08:59:37 +00:00 · 3f94553e7c
commit 3f94553e7c
parent ee00265a9b
3 changed files with 17 additions and 61 deletions
--- a/auberge_vagabonde/main.py
+++ b/auberge_vagabonde/main.py
@ -6,11 +6,10 @@ from random import randint
 from tempfile import NamedTemporaryFile
 from time import sleep
 from typing import cast
-from urllib import error, parse
-from uuid import uuid4

 from bs4 import BeautifulSoup
 from curl_cffi import requests
+from PIL import Image
 from pypub import (  # type: ignore
    Chapter,
    Epub,
@ -44,68 +43,24 @@ class MyChapterFactory(SimpleChapterFactory):  # type: ignore
        # return new element-tree
        return etree

-    def render_images(self, ctx: factory.RenderCtx) -> None:
-        """
-        replace global image references w/ local downloaded ones
-        """
-        downloads: dict[str, str] = {}
-        for image in ctx.etree.xpath(".//img[@src]"):
-            # cleanup link and resolve relative paths
-            url = image.attrib["src"].rsplit("?", 1)[0]
-            fmt = (ctx.chapter.title, url)
-            if "://" not in url:
-                if not ctx.chapter.url:
-                    ctx.logger.warning(
-                        "chapter[{}] cannot render image {!r} w/o chapter-url".format(
-                            *fmt
-                        )
-                    )
-                    continue
-                url = parse.urljoin(ctx.chapter.url, url)
-                fmt = (ctx.chapter.title, url)
-            # skip if url has already been downloaded
-            if url in downloads:
-                image.attrib["src"] = downloads[url]
-                continue
-            # download url into local image folder for epub
-            ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
-            try:
-                res = requests.get(
-                    url,
-                    impersonate=requests.BrowserType.firefox133.value,
-                    thread=None,
-                    curl_options=None,
-                    debug=False,
-                )
-                # ensure status of response is valid
-                status = getattr(res, "status", None)
-                if status and status != 200:
-                    raise error.URLError(f"status: {status}")
-                # read first chunk to determine content-type
-                chunk = res.content
-                mime = factory.mime_type(url, chunk)
-                if not mime:
-                    ctx.logger.warning(
-                        "chapter[{}] cannot identify {!r} mime".format(*fmt)
-                    )
-                    continue
-                fname = f"image-{uuid4()}.{mime}"
-                fpath = path.join(ctx.imagedir, fname)
-                # read rest of the content into associated file
-                with open(fpath, "wb") as f:
-                    f.write(chunk)
-                # save epub-path in downloads cache and update image attribs
-                epub_path = path.join("images/", fname)
-                downloads[url] = epub_path
-                image.attrib["src"] = epub_path
-            except error.URLError:
-                ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
-
    def hydrate(self, ctx: factory.RenderCtx) -> None:
        """
        modify chapter element-tree to render images
        """
-        self.render_images(ctx)
+        ctx.timeout = 100
+        factory.render_images(ctx)
+        for image in ctx.etree.xpath(".//img[@src]"):
+            # cleanup link and resolve relative paths
+            url = image.attrib["src"].rsplit("?", 1)[0]
+            if "://" in url:
+                # retrieve parent
+                parent = cast(HtmlElement, image.getparent())
+                parent.remove(image)
+            else:
+                fpath = path.join(ctx.imagedir, url.replace("images/", ""))
+                image = Image.open(fpath)
+                image.thumbnail((1000, 2000))
+                image.save(fpath)
        if ctx.extern_links and ctx.chapter.url:
            factory.externalize_links(ctx.chapter.url, ctx.etree)

--- a/poetry.lock
+++ b/poetry.lock
@ -556,4 +556,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.9"
-content-hash = "84a41aac6241a5e1eec3635df8c2e51d27506f1098f0e3c45dbed90aa26e5c60"
+content-hash = "f6160acfe46ff419f788eda7f0d7ff8bd5ab69b2a6bc188331e2501cb6bd41a8"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,6 +9,7 @@ license = "WTFPL"
 python = "^3.9"
 beautifulsoup4 = "^4.13.3"
 curl_cffi = "^0.9.0b2"
+pillow = "^11.1.0"
 pypub3 = "^2.0.7"
 pyxml3 = "^0.0.4"