from argparse import ArgumentParser
from json import load
from os import path
from pathlib import Path
from random import randint
from tempfile import NamedTemporaryFile
from time import sleep
from typing import cast

from bs4 import BeautifulSoup
from curl_cffi import requests
from PIL import Image
from pypub import (  # type: ignore
    Chapter,
    Epub,
    SimpleChapterFactory,
    factory,
)
from pyxml.html import HtmlElement, fromstring  # type: ignore

parser = ArgumentParser()
parser.add_argument("-l", "--lang", choices=["fr", "en"], required=True)
parser.add_argument("-v", "--volume", type=int, choices=range(1, 11), required=True)
parser.add_argument("output", type=Path)
args = parser.parse_args()


class MyChapterFactory(SimpleChapterFactory):  # type: ignore
    def cleanup_html(self, content: bytes) -> HtmlElement:
        """
        cleanup html content to only include supported tags
        """
        # check if we can minimalize the scope
        etree = fromstring(content)
        # fix and remove invalid images
        for img in etree.xpath(".//img"):
            # ensure all images with no src are removed
            if "src" not in img.attrib:
                cast(HtmlElement, img.getparent()).remove(img)
            # ensure they also have an alt
            elif "alt" not in img.attrib:
                img.attrib["alt"] = img.attrib["src"]
        # return new element-tree
        return etree

    def hydrate(self, ctx: factory.RenderCtx) -> None:
        """
        modify chapter element-tree to render images
        """
        ctx.timeout = 100
        factory.render_images(ctx)
        for image in ctx.etree.xpath(".//img[@src]"):
            # cleanup link and resolve relative paths
            url = image.attrib["src"].rsplit("?", 1)[0]
            if "://" in url:
                # retrieve parent
                parent = cast(HtmlElement, image.getparent())
                parent.remove(image)
            else:
                fpath = path.join(ctx.imagedir, url.replace("images/", ""))
                image = Image.open(fpath)
                image.thumbnail((1000, 2000))
                image.save(fpath)
        if ctx.extern_links and ctx.chapter.url:
            factory.externalize_links(ctx.chapter.url, ctx.etree)


with open(f"{path.dirname(__file__)}/{args.lang}.json") as f:
    manifest = load(f)
    book = manifest[args.volume - 1]

epub = Epub(
    title=book["title"],
    creator=book["creator"],
    language=args.lang,
    publisher="Nanamazon+",
    factory=MyChapterFactory(),
)

for url in book["chapters"]:
    markup = requests.get(
        url,
        impersonate=requests.BrowserType.firefox133.value,
        thread=None,
        curl_options=None,
        debug=False,
    )
    etree = BeautifulSoup(markup.text, "html.parser")
    title = etree.select_one("h1.entry-title").text  # type: ignore
    content = etree.select("div.entry-content p")
    print(f"Chapter {title}...")
    chapter = Chapter(title, "")
    for elem in content:
        if elem.a:
            continue
        chapter.content += elem.prettify()
    epub.add_chapter(chapter)
    timeout = randint(1, 29)
    print(f"Wait {timeout} seconds...")
    sleep(timeout)

with NamedTemporaryFile() as cover:
    response = requests.get(book["cover"], thread=None, curl_options=None, debug=False)
    cover.write(response.content)
    epub.cover = cover.name
    epub.create(args.output)