This commit is contained in:
parent
ee00265a9b
commit
3f94553e7c
@ -6,11 +6,10 @@ from random import randint
|
|||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import cast
|
from typing import cast
|
||||||
from urllib import error, parse
|
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from curl_cffi import requests
|
from curl_cffi import requests
|
||||||
|
from PIL import Image
|
||||||
from pypub import ( # type: ignore
|
from pypub import ( # type: ignore
|
||||||
Chapter,
|
Chapter,
|
||||||
Epub,
|
Epub,
|
||||||
@ -44,68 +43,24 @@ class MyChapterFactory(SimpleChapterFactory): # type: ignore
|
|||||||
# return new element-tree
|
# return new element-tree
|
||||||
return etree
|
return etree
|
||||||
|
|
||||||
def render_images(self, ctx: factory.RenderCtx) -> None:
|
|
||||||
"""
|
|
||||||
replace global image references w/ local downloaded ones
|
|
||||||
"""
|
|
||||||
downloads: dict[str, str] = {}
|
|
||||||
for image in ctx.etree.xpath(".//img[@src]"):
|
|
||||||
# cleanup link and resolve relative paths
|
|
||||||
url = image.attrib["src"].rsplit("?", 1)[0]
|
|
||||||
fmt = (ctx.chapter.title, url)
|
|
||||||
if "://" not in url:
|
|
||||||
if not ctx.chapter.url:
|
|
||||||
ctx.logger.warning(
|
|
||||||
"chapter[{}] cannot render image {!r} w/o chapter-url".format(
|
|
||||||
*fmt
|
|
||||||
)
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
url = parse.urljoin(ctx.chapter.url, url)
|
|
||||||
fmt = (ctx.chapter.title, url)
|
|
||||||
# skip if url has already been downloaded
|
|
||||||
if url in downloads:
|
|
||||||
image.attrib["src"] = downloads[url]
|
|
||||||
continue
|
|
||||||
# download url into local image folder for epub
|
|
||||||
ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
|
|
||||||
try:
|
|
||||||
res = requests.get(
|
|
||||||
url,
|
|
||||||
impersonate=requests.BrowserType.firefox133.value,
|
|
||||||
thread=None,
|
|
||||||
curl_options=None,
|
|
||||||
debug=False,
|
|
||||||
)
|
|
||||||
# ensure status of response is valid
|
|
||||||
status = getattr(res, "status", None)
|
|
||||||
if status and status != 200:
|
|
||||||
raise error.URLError(f"status: {status}")
|
|
||||||
# read first chunk to determine content-type
|
|
||||||
chunk = res.content
|
|
||||||
mime = factory.mime_type(url, chunk)
|
|
||||||
if not mime:
|
|
||||||
ctx.logger.warning(
|
|
||||||
"chapter[{}] cannot identify {!r} mime".format(*fmt)
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
fname = f"image-{uuid4()}.{mime}"
|
|
||||||
fpath = path.join(ctx.imagedir, fname)
|
|
||||||
# read rest of the content into associated file
|
|
||||||
with open(fpath, "wb") as f:
|
|
||||||
f.write(chunk)
|
|
||||||
# save epub-path in downloads cache and update image attribs
|
|
||||||
epub_path = path.join("images/", fname)
|
|
||||||
downloads[url] = epub_path
|
|
||||||
image.attrib["src"] = epub_path
|
|
||||||
except error.URLError:
|
|
||||||
ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
|
|
||||||
|
|
||||||
def hydrate(self, ctx: factory.RenderCtx) -> None:
|
def hydrate(self, ctx: factory.RenderCtx) -> None:
|
||||||
"""
|
"""
|
||||||
modify chapter element-tree to render images
|
modify chapter element-tree to render images
|
||||||
"""
|
"""
|
||||||
self.render_images(ctx)
|
ctx.timeout = 100
|
||||||
|
factory.render_images(ctx)
|
||||||
|
for image in ctx.etree.xpath(".//img[@src]"):
|
||||||
|
# cleanup link and resolve relative paths
|
||||||
|
url = image.attrib["src"].rsplit("?", 1)[0]
|
||||||
|
if "://" in url:
|
||||||
|
# retrieve parent
|
||||||
|
parent = cast(HtmlElement, image.getparent())
|
||||||
|
parent.remove(image)
|
||||||
|
else:
|
||||||
|
fpath = path.join(ctx.imagedir, url.replace("images/", ""))
|
||||||
|
image = Image.open(fpath)
|
||||||
|
image.thumbnail((1000, 2000))
|
||||||
|
image.save(fpath)
|
||||||
if ctx.extern_links and ctx.chapter.url:
|
if ctx.extern_links and ctx.chapter.url:
|
||||||
factory.externalize_links(ctx.chapter.url, ctx.etree)
|
factory.externalize_links(ctx.chapter.url, ctx.etree)
|
||||||
|
|
||||||
|
2
poetry.lock
generated
2
poetry.lock
generated
@ -556,4 +556,4 @@ files = [
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "84a41aac6241a5e1eec3635df8c2e51d27506f1098f0e3c45dbed90aa26e5c60"
|
content-hash = "f6160acfe46ff419f788eda7f0d7ff8bd5ab69b2a6bc188331e2501cb6bd41a8"
|
||||||
|
@ -9,6 +9,7 @@ license = "WTFPL"
|
|||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
beautifulsoup4 = "^4.13.3"
|
beautifulsoup4 = "^4.13.3"
|
||||||
curl_cffi = "^0.9.0b2"
|
curl_cffi = "^0.9.0b2"
|
||||||
|
pillow = "^11.1.0"
|
||||||
pypub3 = "^2.0.7"
|
pypub3 = "^2.0.7"
|
||||||
pyxml3 = "^0.0.4"
|
pyxml3 = "^0.0.4"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user