fix: 💄 add image support behind cloudflare
This commit is contained in:
parent
14b12e3c28
commit
9127bdd8ae
@ -6,10 +6,17 @@ from random import randint
|
|||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
from urllib import error, parse
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from curl_cffi import requests
|
from curl_cffi import requests
|
||||||
from pypub import Chapter, Epub, SimpleChapterFactory # type: ignore
|
from pypub import ( # type: ignore
|
||||||
|
Chapter,
|
||||||
|
Epub,
|
||||||
|
SimpleChapterFactory,
|
||||||
|
factory,
|
||||||
|
)
|
||||||
from pyxml.html import HtmlElement, fromstring # type: ignore
|
from pyxml.html import HtmlElement, fromstring # type: ignore
|
||||||
|
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
@ -37,6 +44,66 @@ class MyChapterFactory(SimpleChapterFactory): # type: ignore
|
|||||||
# return new element-tree
|
# return new element-tree
|
||||||
return etree
|
return etree
|
||||||
|
|
||||||
|
def hydrate(self, ctx: factory.RenderCtx) -> None:
|
||||||
|
"""
|
||||||
|
modify chapter element-tree to render images
|
||||||
|
"""
|
||||||
|
downloads: dict[str, str] = {}
|
||||||
|
for image in ctx.etree.xpath(".//img[@src]"):
|
||||||
|
# cleanup link and resolve relative paths
|
||||||
|
url = image.attrib["src"].rsplit("?", 1)[0]
|
||||||
|
fmt = (ctx.chapter.title, url)
|
||||||
|
if "://" not in url:
|
||||||
|
if not ctx.chapter.url:
|
||||||
|
ctx.logger.warning(
|
||||||
|
"chapter[{}] cannot render image {!r} w/o chapter-url".format(
|
||||||
|
*fmt
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
url = parse.urljoin(ctx.chapter.url, url)
|
||||||
|
fmt = (ctx.chapter.title, url)
|
||||||
|
# skip if url has already been downloaded
|
||||||
|
if url in downloads:
|
||||||
|
image.attrib["src"] = downloads[url]
|
||||||
|
continue
|
||||||
|
# download url into local image folder for epub
|
||||||
|
ctx.logger.debug("chapter[{}] downloading image: {!r}".format(*fmt))
|
||||||
|
try:
|
||||||
|
res = requests.get(
|
||||||
|
url,
|
||||||
|
timeout=ctx.timeout,
|
||||||
|
impersonate=requests.BrowserType.firefox133.value,
|
||||||
|
thread=None,
|
||||||
|
curl_options=None,
|
||||||
|
debug=False,
|
||||||
|
)
|
||||||
|
# ensure status of response is valid
|
||||||
|
status = getattr(res, "status", None)
|
||||||
|
if status and status != 200:
|
||||||
|
raise error.URLError(f"status: {status}")
|
||||||
|
# read first chunk to determine content-type
|
||||||
|
chunk = res.content
|
||||||
|
mime = factory.mime_type(url, chunk)
|
||||||
|
if not mime:
|
||||||
|
ctx.logger.warning(
|
||||||
|
"chapter[{}] cannot identify {!r} mime".format(*fmt)
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
fname = f"image-{uuid4()}.{mime}"
|
||||||
|
fpath = path.join(ctx.imagedir, fname)
|
||||||
|
# read rest of the content into associated file
|
||||||
|
with open(fpath, "wb") as f:
|
||||||
|
f.write(chunk)
|
||||||
|
# save epub-path in downloads cache and update image attribs
|
||||||
|
epub_path = path.join("images/", fname)
|
||||||
|
downloads[url] = epub_path
|
||||||
|
image.attrib["src"] = epub_path
|
||||||
|
except error.URLError:
|
||||||
|
ctx.logger.error("chapter[{}] failed to download {!r}".format(*fmt))
|
||||||
|
if ctx.extern_links and ctx.chapter.url:
|
||||||
|
self.externalize_links(ctx.chapter.url, ctx.etree)
|
||||||
|
|
||||||
|
|
||||||
with open(f"{path.dirname(__file__)}/{args.lang}.json") as f:
|
with open(f"{path.dirname(__file__)}/{args.lang}.json") as f:
|
||||||
manifest = load(f)
|
manifest = load(f)
|
||||||
@ -68,7 +135,7 @@ for url in book["chapters"]:
|
|||||||
continue
|
continue
|
||||||
chapter.content += elem.prettify()
|
chapter.content += elem.prettify()
|
||||||
epub.add_chapter(chapter)
|
epub.add_chapter(chapter)
|
||||||
timeout = randint(10, 90)
|
timeout = randint(1, 29)
|
||||||
print(f"Wait {timeout} seconds...")
|
print(f"Wait {timeout} seconds...")
|
||||||
sleep(timeout)
|
sleep(timeout)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user