Remove malicious script and frames
Some checks reported errors
continuous-integration/drone/push Build was killed

This commit is contained in:
Michel Roux 2022-01-19 15:00:03 +00:00
parent 2700e0302d
commit 23f9ce0f07

23
twi.py
View File

@ -14,19 +14,18 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
chapter_content = chapter_html.select_one("div.entry-content") chapter_content = chapter_html.select_one("div.entry-content")
chapter_title = chapter_html.select_one("h1.entry-title").get_text().strip() chapter_title = chapter_html.select_one("h1.entry-title").get_text().strip()
a_remove = chapter_content.find_all("a") to_remove = [
hr_remove = chapter_content.find_all("hr") chapter_content.find_all("a"),
img_remove = chapter_content.find_all("img") chapter_content.find_all("hr"),
div_remove = chapter_content.select("div.tiled-gallery") chapter_content.find_all("img"),
chapter_content.find_all("iframe"),
chapter_content.find_all("script"),
chapter_content.select("div.tiled-gallery"),
]
for removed in a_remove: for dataset in to_remove:
removed.decompose() for removed in dataset:
for removed in hr_remove: removed.decompose()
removed.decompose()
for removed in img_remove:
removed.decompose()
for removed in div_remove:
removed.decompose()
logging.log(logging.INFO, f"{title} - {chapter_title}") logging.log(logging.INFO, f"{title} - {chapter_title}")
book.add_chapter( book.add_chapter(