Remove iframes and chapter index

2022-08-18 13:43:30 +02:00 · 2022-08-18 13:43:30 +02:00 · 59c9f8710c
commit 59c9f8710c
parent 28b2091182
2 changed files with 9 additions and 3 deletions
--- a/gravesong.py
+++ b/gravesong.py
@ -34,6 +34,7 @@ def process(metadatas, output_dir):
    chapter_title = "Introduction"
    chapter_content = ""
    chapter_index = 1
    for paragraph in entry_content.children:
        if paragraph.name == "h1":
@ -42,11 +43,12 @@ def process(metadatas, output_dir):
                epub.EpubHtml(
                    title=chapter_title,
                    content=utils.generate_title_html(chapter_title, chapter_content),
-                    file_name=f"{chapter_title}.xhtml",
+                    file_name=f"{chapter_index}.xhtml",
                    lang=metadatas["language"],
                )
            )
            chapter_title = paragraph.get_text().strip()
            chapter_index += 1
        elif paragraph.name == "p":
            chapter_content += paragraph.prettify()
@ -55,7 +57,7 @@ def process(metadatas, output_dir):
        epub.EpubHtml(
            title=chapter_title,
            content=utils.generate_title_html(chapter_title, chapter_content),
-            file_name=f"{chapter_title}.xhtml",
+            file_name=f"{chapter_index}.xhtml",
            lang=metadatas["language"],
        )
    )
--- a/twi.py
+++ b/twi.py
@ -18,6 +18,7 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
    for author in metadatas["authors"]:
        book.add_author(author)
    chapter_index = 1
    for link in links:
        chapter_response = requests.get(link["href"])
        chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
@ -28,6 +29,8 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
            chapter_content.find_all("a"),
            chapter_content.find_all("h3"),
            chapter_content.find_all("hr"),
            chapter_content.find_all("iframe"),
            chapter_content.find_all("script"),
            chapter_content.select("div.tiled-gallery"),
        ]
@ -42,10 +45,11 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
                content=utils.generate_title_html(
                    chapter_title, chapter_content.prettify()
                ),
-                file_name=f"{chapter_title}.xhtml",
+                file_name=f"{chapter_index}.xhtml",
                lang=metadatas["language"],
            )
        )
        chapter_index += 1
    logging.log(logging.INFO, f"{title} - {volume_title}")
    for book_item in book_items: