Remove iframes and chapter index
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2022-08-18 13:43:30 +02:00
parent 28b2091182
commit 59c9f8710c
2 changed files with 9 additions and 3 deletions

View File

@ -34,6 +34,7 @@ def process(metadatas, output_dir):
chapter_title = "Introduction" chapter_title = "Introduction"
chapter_content = "" chapter_content = ""
chapter_index = 1
for paragraph in entry_content.children: for paragraph in entry_content.children:
if paragraph.name == "h1": if paragraph.name == "h1":
@ -42,11 +43,12 @@ def process(metadatas, output_dir):
epub.EpubHtml( epub.EpubHtml(
title=chapter_title, title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content), content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml", file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"], lang=metadatas["language"],
) )
) )
chapter_title = paragraph.get_text().strip() chapter_title = paragraph.get_text().strip()
chapter_index += 1
elif paragraph.name == "p": elif paragraph.name == "p":
chapter_content += paragraph.prettify() chapter_content += paragraph.prettify()
@ -55,7 +57,7 @@ def process(metadatas, output_dir):
epub.EpubHtml( epub.EpubHtml(
title=chapter_title, title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content), content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml", file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"], lang=metadatas["language"],
) )
) )

6
twi.py
View File

@ -18,6 +18,7 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
for author in metadatas["authors"]: for author in metadatas["authors"]:
book.add_author(author) book.add_author(author)
chapter_index = 1
for link in links: for link in links:
chapter_response = requests.get(link["href"]) chapter_response = requests.get(link["href"])
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser") chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
@ -28,6 +29,8 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
chapter_content.find_all("a"), chapter_content.find_all("a"),
chapter_content.find_all("h3"), chapter_content.find_all("h3"),
chapter_content.find_all("hr"), chapter_content.find_all("hr"),
chapter_content.find_all("iframe"),
chapter_content.find_all("script"),
chapter_content.select("div.tiled-gallery"), chapter_content.select("div.tiled-gallery"),
] ]
@ -42,10 +45,11 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
content=utils.generate_title_html( content=utils.generate_title_html(
chapter_title, chapter_content.prettify() chapter_title, chapter_content.prettify()
), ),
file_name=f"{chapter_title}.xhtml", file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"], lang=metadatas["language"],
) )
) )
chapter_index += 1
logging.log(logging.INFO, f"{title} - {volume_title}") logging.log(logging.INFO, f"{title} - {volume_title}")
for book_item in book_items: for book_item in book_items: