This repository has been archived on 2024-02-23. You can view files and clone it, but cannot push or open issues or pull requests.
Auberge_Vagabonde/livres_en.py

60 lines
1.7 KiB
Python

import os
import requests
from bs4 import BeautifulSoup
from pypub import Epub, create_chapter_from_string
if not os.path.exists('output'):
os.makedirs('output')
html = requests.get('https://wanderinginn.com/table-of-contents/')
soup = BeautifulSoup(html.text, 'html.parser')
tags = soup.select('div.entry-content > p')
proceed = True
volume = 0
volumes = [None, None, None]
for volume_tag in tags:
if not proceed:
proceed = True
continue
if 'Volume' in volume_tag.get_text() and int(volume_tag.get_text().replace('Volume ', '')) < len(volumes):
proceed = False
continue
if 'Volume' in volume_tag.get_text():
volume = int(volume_tag.get_text().replace('Volume ', ''))
volumes.append(
Epub("The Wandering Inn - Volume %d" % volume, creator='Pirateaba', rights='Pirateaba',
language='en', cover='cover.png')
)
continue
for link_tag in volume_tag:
if link_tag.name == 'a':
chapter = requests.get(link_tag['href'])
chapter_soup = soup = BeautifulSoup(chapter.text, 'html.parser')
title = chapter_soup.select_one('h1.entry-title').get_text()
text = chapter_soup.select_one('div.entry-content')
for pagination in text.find_all('a'):
pagination.decompose()
for image in text.find_all('img'):
image.decompose()
for cut in text.find_all('hr'):
cut.decompose()
print(title)
chapter = create_chapter_from_string(str(text), title)
volumes[volume].add_chapter(chapter)
for ebook in volumes:
if ebook is None:
continue
ebook.create_epub('output')