New way of generating epubs

2022-08-18 12:59:02 +02:00 · 2022-08-18 12:59:02 +02:00 · 28b2091182
commit 28b2091182
parent 4d550b3806
7 changed files with 90 additions and 30 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,3 @@
+[flake8]
+exclude = .venv/
+max-line-length = 100
--- a/convert.sh
+++ b/convert.sh
@ -2,6 +2,6 @@

 for FILE in output/epubs/*
 do
-  ebook-polish -fpuiU "${FILE}" "${FILE}"
+  ebook-polish -fjpuiHU "${FILE}" "${FILE}"
  ebook-convert "${FILE}" "${FILE%.*}.pdf"
 done
--- a/gravesong.py
+++ b/gravesong.py
@ -1,13 +1,23 @@
 import bs4
-import executor
 import logging
-import pypub
 import requests
+import utils
+
+from ebooklib import epub


-@executor.executor
-def process(metadata, output_dir):
-    book = pypub.Epub("Gravesong", **metadata)
+@utils.executor
+def process(metadatas, output_dir):
+    book = epub.EpubBook()
+    book_items = []
+    book.set_identifier("gravesong")
+    book.set_title("Gravesong")
+    book.set_language(metadatas["language"])
+    book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
+    book.add_metadata("DC", "description", "Advance Patreon Edition")
+
+    for author in metadatas["authors"]:
+        book.add_author(author)

    response = requests.post(
        "https://wanderinginn.com/wp-pass.php",
@ -28,15 +38,34 @@ def process(metadata, output_dir):
    for paragraph in entry_content.children:
        if paragraph.name == "h1":
            logging.log(logging.INFO, f"Gravesong - {chapter_title}")
-            book.add_chapter(
-                pypub.create_chapter_from_string(chapter_content, chapter_title)
+            book_items.append(
+                epub.EpubHtml(
+                    title=chapter_title,
+                    content=utils.generate_title_html(chapter_title, chapter_content),
+                    file_name=f"{chapter_title}.xhtml",
+                    lang=metadatas["language"],
+                )
            )
            chapter_title = paragraph.get_text().strip()
        elif paragraph.name == "p":
            chapter_content += paragraph.prettify()

    logging.log(logging.INFO, f"Gravesong - {chapter_title}")
-    book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
+    book_items.append(
+        epub.EpubHtml(
+            title=chapter_title,
+            content=utils.generate_title_html(chapter_title, chapter_content),
+            file_name=f"{chapter_title}.xhtml",
+            lang=metadatas["language"],
+        )
+    )

    logging.log(logging.INFO, "Gravesong - Book")
-    book.create_epub(output_dir)
+    for book_item in book_items:
+        book.add_item(book_item)
+
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+    book.toc = book_items
+    book.spine = ["cover", "nav", *book_items]
+    epub.write_epub(f"{output_dir}/Gravesong.epub", book)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
 requests==2.28.1
 beautifulsoup4==4.11.1
-git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub
+EbookLib==0.17.1
--- a/run.py
+++ b/run.py
@ -36,18 +36,16 @@ async def main():
    await asyncio.gather(
        gravesong.process(
            {
-                "creator": "Pirateaba",
+                "authors": ["Pirateaba"],
                "language": "en",
-                "publisher": "Xefir",
                "cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
            },
            output_epubs,
        ),
        twi.process(
            {
-                "creator": "Pirateaba",
+                "authors": ["Pirateaba"],
                "language": "en",
-                "publisher": "Xefir",
                "cover": f"{output_imgs}/twi.jpg",
            },
            output_epubs,
@ -55,9 +53,8 @@ async def main():
        ),
        twi.process(
            {
-                "creator": "Pirateaba",
+                "authors": ["Pirateaba", "ElliVia"],
                "language": "fr",
-                "publisher": "Maroti, ElliVia",
                "cover": f"{output_imgs}/twi.jpg",
            },
            output_epubs,
--- a/twi.py
+++ b/twi.py
@ -1,12 +1,22 @@
 import bs4
-import executor
 import logging
-import pypub
 import requests
+import utils
+
+from ebooklib import epub


-def fetchVolume(title, metadata, volume_title, output_dir, links):
-    book = pypub.Epub(f"{title} - {volume_title}", **metadata)
+def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
+    book = epub.EpubBook()
+    book_items = []
+    book.set_identifier(title.replace(" ", "").lower())
+    book.set_title(title)
+    book.set_language(metadatas["language"])
+    book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
+    book.add_metadata("DC", "description", description)
+
+    for author in metadatas["authors"]:
+        book.add_author(author)

    for link in links:
        chapter_response = requests.get(link["href"])
@ -18,9 +28,6 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
            chapter_content.find_all("a"),
            chapter_content.find_all("h3"),
            chapter_content.find_all("hr"),
-            chapter_content.find_all("img"),
-            chapter_content.find_all("iframe"),
-            chapter_content.find_all("script"),
            chapter_content.select("div.tiled-gallery"),
        ]

@ -29,21 +36,36 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
                removed.decompose()

        logging.log(logging.INFO, f"{title} - {chapter_title}")
-        book.add_chapter(
-            pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
+        book_items.append(
+            epub.EpubHtml(
+                title=chapter_title,
+                content=utils.generate_title_html(
+                    chapter_title, chapter_content.prettify()
+                ),
+                file_name=f"{chapter_title}.xhtml",
+                lang=metadatas["language"],
+            )
        )

    logging.log(logging.INFO, f"{title} - {volume_title}")
-    book.create_epub(output_dir)
+    for book_item in book_items:
+        book.add_item(book_item)
+
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+    book.toc = book_items
+    book.spine = ["cover", "nav", *book_items]
+    epub.write_epub(f"{output_dir}/{title} - {volume_title}.epub", book)


-@executor.executor
-def process(metadata, output_dir, url):
+@utils.executor
+def process(metadatas, output_dir, url):
    response = requests.get(url)

    html = bs4.BeautifulSoup(response.content, "html.parser")
    content = html.select("div.entry-content > p")
    title = html.select_one("#site-title > span > a").get_text().strip()
+    description = html.select_one("#site-description").get_text().strip()

    volume_title = None

@ -52,5 +74,10 @@ def process(metadata, output_dir, url):
            volume_title = paragraph.strong.get_text().strip()
        else:
            fetchVolume(
-                title, metadata, volume_title, output_dir, paragraph.find_all("a")
+                title,
+                description,
+                metadatas,
+                volume_title,
+                output_dir,
+                paragraph.find_all("a"),
            )
--- a/executor.py
+++ b/executor.py
@ -10,3 +10,7 @@ def executor(f):
        )

    return wrapped
+
+
+def generate_title_html(title, content):
+    return f"<h1 style='text-align:center;margin:4rem'>{title}</h1>{content}"