From 3f94553e7cd2a94d88bd3f2f46c7fd49957766c5 Mon Sep 17 00:00:00 2001
From: Michel Roux <xefir@crystalyx.net>
Date: Fri, 14 Feb 2025 08:59:37 +0000
Subject: [PATCH] fix: :zap: reduce image size

---
 auberge_vagabonde/__main__.py | 75 +++++++----------------------------
 poetry.lock                   |  2 +-
 pyproject.toml                |  1 +
 3 files changed, 17 insertions(+), 61 deletions(-)

diff --git a/auberge_vagabonde/__main__.py b/auberge_vagabonde/__main__.py
index 4c501d1..1693e82 100644
--- a/auberge_vagabonde/__main__.py
+++ b/auberge_vagabonde/__main__.py
@@ -6,11 +6,10 @@ from random import randint
 from tempfile import NamedTemporaryFile
 from time import sleep
 from typing import cast
-from urllib import error, parse
-from uuid import uuid4
 
 from bs4 import BeautifulSoup
 from curl_cffi import requests
+from PIL import Image
 from pypub import (  # type: ignore
     Chapter,
     Epub,
@@ -44,68 +43,24 @@ class MyChapterFactory(SimpleChapterFactory):  # type: ignore
         # return new element-tree
         return etree
 
-    def render_images(self, ctx: factory.RenderCtx) -> None:
-        """
-        replace global image references w/ local downloaded ones
-        """
-        downloads: dict[str, str] = {}
-        for image in ctx.etree.xpath(".//img[@src]"):
-            # cleanup link and resolve relative paths
-            url = image.attrib["src"].rsplit("?", 1)[0]
-            fmt = (ctx.chapter.title, url)
-            if "://" not in url:
-                if not ctx.chapter.url:
-                    ctx.logger.warning(
-                        "chapter[{}] cannot render image {!r} w/o chapter-url".format(
-                            *fmt
-                        )
-                    )
-                    continue
-                url = parse.urljoin(ctx.chapter.url, url)
-                fmt = (ctx.chapter.title, url)
-            # skip if url has already been downloaded
-            if url in downloads:
-                image.attrib["src"] = downloads[url]
-                continue
-            # download url into local image folder for epub
-            ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
-            try:
-                res = requests.get(
-                    url,
-                    impersonate=requests.BrowserType.firefox133.value,
-                    thread=None,
-                    curl_options=None,
-                    debug=False,
-                )
-                # ensure status of response is valid
-                status = getattr(res, "status", None)
-                if status and status != 200:
-                    raise error.URLError(f"status: {status}")
-                # read first chunk to determine content-type
-                chunk = res.content
-                mime = factory.mime_type(url, chunk)
-                if not mime:
-                    ctx.logger.warning(
-                        "chapter[{}] cannot identify {!r} mime".format(*fmt)
-                    )
-                    continue
-                fname = f"image-{uuid4()}.{mime}"
-                fpath = path.join(ctx.imagedir, fname)
-                # read rest of the content into associated file
-                with open(fpath, "wb") as f:
-                    f.write(chunk)
-                # save epub-path in downloads cache and update image attribs
-                epub_path = path.join("images/", fname)
-                downloads[url] = epub_path
-                image.attrib["src"] = epub_path
-            except error.URLError:
-                ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
-
     def hydrate(self, ctx: factory.RenderCtx) -> None:
         """
         modify chapter element-tree to render images
         """
-        self.render_images(ctx)
+        ctx.timeout = 100
+        factory.render_images(ctx)
+        for image in ctx.etree.xpath(".//img[@src]"):
+            # cleanup link and resolve relative paths
+            url = image.attrib["src"].rsplit("?", 1)[0]
+            if "://" in url:
+                # retrieve parent
+                parent = cast(HtmlElement, image.getparent())
+                parent.remove(image)
+            else:
+                fpath = path.join(ctx.imagedir, url.replace("images/", ""))
+                image = Image.open(fpath)
+                image.thumbnail((1000, 2000))
+                image.save(fpath)
         if ctx.extern_links and ctx.chapter.url:
             factory.externalize_links(ctx.chapter.url, ctx.etree)
 
diff --git a/poetry.lock b/poetry.lock
index c9d1ddd..1f361f1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -556,4 +556,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.9"
-content-hash = "84a41aac6241a5e1eec3635df8c2e51d27506f1098f0e3c45dbed90aa26e5c60"
+content-hash = "f6160acfe46ff419f788eda7f0d7ff8bd5ab69b2a6bc188331e2501cb6bd41a8"
diff --git a/pyproject.toml b/pyproject.toml
index dd294f2..2d1ab4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,6 +9,7 @@ license = "WTFPL"
 python = "^3.9"
 beautifulsoup4 = "^4.13.3"
 curl_cffi = "^0.9.0b2"
+pillow = "^11.1.0"
 pypub3 = "^2.0.7"
 pyxml3 = "^0.0.4"