Remove malicious script and frames
continuous-integration/drone/push Build was killed Details

This commit is contained in:
Michel Roux 2022-01-19 15:00:03 +00:00
parent 2700e0302d
commit 23f9ce0f07
1 changed files with 11 additions and 12 deletions

23
twi.py
View File

@ -14,19 +14,18 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
chapter_content = chapter_html.select_one("div.entry-content")
chapter_title = chapter_html.select_one("h1.entry-title").get_text().strip()
a_remove = chapter_content.find_all("a")
hr_remove = chapter_content.find_all("hr")
img_remove = chapter_content.find_all("img")
div_remove = chapter_content.select("div.tiled-gallery")
to_remove = [
chapter_content.find_all("a"),
chapter_content.find_all("hr"),
chapter_content.find_all("img"),
chapter_content.find_all("iframe"),
chapter_content.find_all("script"),
chapter_content.select("div.tiled-gallery"),
]
for removed in a_remove:
removed.decompose()
for removed in hr_remove:
removed.decompose()
for removed in img_remove:
removed.decompose()
for removed in div_remove:
removed.decompose()
for dataset in to_remove:
for removed in dataset:
removed.decompose()
logging.log(logging.INFO, f"{title} - {chapter_title}")
book.add_chapter(