This commit is contained in:
parent
09581016c3
commit
5174a7a1e7
20
.drone.yml
20
.drone.yml
@ -3,19 +3,15 @@ name: default
|
||||
type: docker
|
||||
|
||||
steps:
|
||||
- name: flake8
|
||||
image: python:slim
|
||||
commands:
|
||||
- pip install flake8
|
||||
- flake8 --max-line-length 111
|
||||
- name: epub
|
||||
image: python
|
||||
image: node:lts
|
||||
commands:
|
||||
- pip install -r requirements.txt
|
||||
- python livres_fr.py
|
||||
- python livres_en.py
|
||||
- npm i
|
||||
- eslint *.js
|
||||
- node livres_fr
|
||||
- node livres_en
|
||||
- name: pdf
|
||||
image: debian:10-slim
|
||||
image: debian:stable-slim
|
||||
commands:
|
||||
- apt-get update
|
||||
- apt-get install -y --no-install-recommends calibre
|
||||
@ -34,7 +30,7 @@ name: dependabot
|
||||
type: docker
|
||||
|
||||
steps:
|
||||
- name: dependabot-pip
|
||||
- name: dependabot-node
|
||||
image: xefir/docker-dependabot-gitea
|
||||
environment:
|
||||
GITHUB_ACCESS_TOKEN:
|
||||
@ -44,7 +40,7 @@ steps:
|
||||
PROJECT_PATH: Xefir/Auberge_Vagabonde
|
||||
GITEA_HOSTNAME: git.crystalyx.net
|
||||
GITEA_SCHEME: https
|
||||
PACKAGE_MANAGER: pip
|
||||
PACKAGE_MANAGER: npm_and_yarn
|
||||
|
||||
trigger:
|
||||
event:
|
||||
|
9
.eslintrc.yml
Normal file
9
.eslintrc.yml
Normal file
@ -0,0 +1,9 @@
|
||||
env:
|
||||
commonjs: true
|
||||
es2021: true
|
||||
node: true
|
||||
extends: 'eslint:recommended'
|
||||
parserOptions:
|
||||
ecmaVersion: 12
|
||||
rules:
|
||||
no-constant-condition: off
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,3 @@
|
||||
.idea
|
||||
.venv
|
||||
output
|
||||
node_modules
|
||||
|
69
livres_en.js
Normal file
69
livres_en.js
Normal file
@ -0,0 +1,69 @@
|
||||
const cheerio = require('cheerio')
|
||||
const fetch = require('node-fetch')
|
||||
const epub = require('epub-gen')
|
||||
|
||||
const START_VOLUME = 3
|
||||
const books = []
|
||||
|
||||
const metadata = (volume) => ({
|
||||
title: `The Wandering Inn - Volume ${volume}`,
|
||||
author: ['Pirateaba'],
|
||||
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
|
||||
output: `output/The Wandering Inn - Volume ${volume}.epub`,
|
||||
version: 3,
|
||||
lang: 'en',
|
||||
appendChapterTitles: true,
|
||||
content: [],
|
||||
links: [],
|
||||
verbose: true,
|
||||
description: 'A tale of a girl, an inn, and a world full of levels',
|
||||
})
|
||||
|
||||
const fetchPage = async (url) => {
|
||||
const response = await fetch(url)
|
||||
const responseHtml = await response.text()
|
||||
const html = cheerio.load(responseHtml)
|
||||
|
||||
const title = html('h1.entry-title').text()
|
||||
const content = html('div.entry-content')
|
||||
content.find('a').remove()
|
||||
content.find('div.tiled-gallery').remove()
|
||||
const data = content.html()
|
||||
|
||||
console.log(title)
|
||||
|
||||
return {
|
||||
title,
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const response = await fetch('https://wanderinginn.com/table-of-contents/')
|
||||
const responseHtml = await response.text()
|
||||
const html = cheerio.load(responseHtml)
|
||||
|
||||
const content = html('div.entry-content > p')
|
||||
let volume = 0;
|
||||
|
||||
content.each((i, el) => {
|
||||
if (i % 2 === 0) {
|
||||
volume = html(el).text().replace(/Volume /, '')
|
||||
if (volume >= START_VOLUME) {
|
||||
books.push(metadata(i))
|
||||
}
|
||||
} else if (volume >= START_VOLUME) {
|
||||
html('a', el).each((i, el) => {
|
||||
books[volume - START_VOLUME].links.push(html(el).attr('href'))
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
books.map(async book => {
|
||||
for (const link of book.links) {
|
||||
book.content.push(await fetchPage(link))
|
||||
}
|
||||
|
||||
new epub(book)
|
||||
})
|
||||
})()
|
61
livres_en.py
61
livres_en.py
@ -1,61 +0,0 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from pypub import Epub, create_chapter_from_string
|
||||
|
||||
if not os.path.exists('output'):
|
||||
os.makedirs('output')
|
||||
|
||||
html = requests.get('https://wanderinginn.com/table-of-contents/')
|
||||
soup = BeautifulSoup(html.text, 'html.parser')
|
||||
|
||||
tags = soup.select('div.entry-content > p')
|
||||
proceed = True
|
||||
volume = 0
|
||||
volumes = [None, None, None]
|
||||
|
||||
for volume_tag in tags:
|
||||
if not proceed:
|
||||
proceed = True
|
||||
continue
|
||||
|
||||
if 'Volume' in volume_tag.get_text() and int(volume_tag.get_text().replace('Volume ', '')) < len(volumes):
|
||||
proceed = False
|
||||
continue
|
||||
|
||||
if 'Volume' in volume_tag.get_text():
|
||||
volume = int(volume_tag.get_text().replace('Volume ', ''))
|
||||
volumes.append(
|
||||
Epub("The Wandering Inn - Volume %d" % volume, creator='Pirateaba', rights='Pirateaba',
|
||||
language='en', cover='cover.png')
|
||||
)
|
||||
continue
|
||||
|
||||
for link_tag in volume_tag:
|
||||
if link_tag.name == 'a':
|
||||
chapter = requests.get(link_tag['href'])
|
||||
chapter_soup = soup = BeautifulSoup(chapter.text, 'html.parser')
|
||||
|
||||
title = chapter_soup.select_one('h1.entry-title').get_text()
|
||||
text = chapter_soup.select_one('div.entry-content')
|
||||
|
||||
for pagination in text.find_all('a'):
|
||||
pagination.decompose()
|
||||
for cut in text.find_all('hr'):
|
||||
cut.decompose()
|
||||
|
||||
if volume >= 7:
|
||||
for image in text.find_all('img'):
|
||||
image.decompose()
|
||||
|
||||
print(title)
|
||||
|
||||
chapter = create_chapter_from_string(str(text), title)
|
||||
volumes[volume].add_chapter(chapter)
|
||||
|
||||
for ebook in volumes:
|
||||
if ebook is None:
|
||||
continue
|
||||
|
||||
ebook.create_epub('output')
|
66
livres_fr.js
Normal file
66
livres_fr.js
Normal file
@ -0,0 +1,66 @@
|
||||
const cheerio = require('cheerio')
|
||||
const fetch = require('node-fetch')
|
||||
const epub = require('epub-gen')
|
||||
|
||||
let post = 0
|
||||
let page = 0
|
||||
const MAX_VOLUME = 3
|
||||
const books = []
|
||||
|
||||
const volume = (post) => post <= 69 ? 1 : post <= 168 ? 2 : MAX_VOLUME
|
||||
|
||||
const metadata = (volume) => ({
|
||||
title: `L'auberge Vagabonde - Volume ${volume}`,
|
||||
author: ['Maroti', 'ElliVia', 'Pirateaba'],
|
||||
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
|
||||
output: `output/L'auberge Vagabonde - Volume ${volume}.epub`,
|
||||
version: 3,
|
||||
lang: 'fr',
|
||||
tocTitle: 'Table des matières',
|
||||
appendChapterTitles: false,
|
||||
content: [],
|
||||
verbose: true,
|
||||
description: "L'histoire d'une fille, d'une auberge et d'un monde plein de niveaux",
|
||||
})
|
||||
|
||||
for (let i = 0; i <= MAX_VOLUME; i++) {
|
||||
books.push(metadata(i))
|
||||
}
|
||||
|
||||
(async () => {
|
||||
while (true) {
|
||||
const response = await fetch(`https://www.jeunesecrivains.com/t53075p${page * 15}-the-wandering-inn-fan-traduction-fantastique-aventure`)
|
||||
const responseHtml = await response.text()
|
||||
const html = cheerio.load(responseHtml)
|
||||
|
||||
const postBody = html('div.postbody')
|
||||
|
||||
if (postBody.html() === null) break
|
||||
|
||||
postBody.each((i, el) => {
|
||||
if (++post === 1) return
|
||||
|
||||
const title = html('div[align=center]', el)
|
||||
|
||||
if (title.html() === null) return
|
||||
|
||||
let text = html('div > div', el)
|
||||
|
||||
text = html(text).attr('style', '')
|
||||
|
||||
const titleText = title.first().html()
|
||||
.replace(/<br>/g, ' ')
|
||||
.replace(/<(\/|)strong>/g, '')
|
||||
.replace(/\*/g, '')
|
||||
|
||||
console.log(`${post} - ${titleText}`)
|
||||
|
||||
books[volume(post) - 1].content.push({
|
||||
title: titleText,
|
||||
data: text.html(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
books.map(book => new epub(book))
|
||||
})()
|
64
livres_fr.py
64
livres_fr.py
@ -1,64 +0,0 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from pypub import Epub, create_chapter_from_string
|
||||
|
||||
if not os.path.exists('output'):
|
||||
os.makedirs('output')
|
||||
|
||||
post = 0
|
||||
url = 'https://www.jeunesecrivains.com/t53075p%s-the-wandering-inn-fan-traduction-fantastique-aventure'
|
||||
volumes = [
|
||||
None,
|
||||
Epub("L'auberge Vagabonde - Volume 1", creator='Maroti, ElliVia', rights='Pirateaba', language='fr',
|
||||
cover='cover.png'),
|
||||
Epub("L'auberge Vagabonde - Volume 2", creator='Maroti, ElliVia', rights='Pirateaba', language='fr',
|
||||
cover='cover.png'),
|
||||
Epub("L'auberge Vagabonde - Volume 3", creator='Maroti, ElliVia', rights='Pirateaba', language='fr',
|
||||
cover='cover.png')
|
||||
]
|
||||
|
||||
for page in range(12):
|
||||
html = requests.get(url % (page * 15))
|
||||
soup = BeautifulSoup(html.text, 'html.parser')
|
||||
tags = soup.select('div.postbody')
|
||||
|
||||
for tag in tags:
|
||||
post = post + 1
|
||||
|
||||
if post <= 69:
|
||||
volume = 1
|
||||
elif post <= 168:
|
||||
volume = 2
|
||||
else:
|
||||
volume = 3
|
||||
|
||||
if post == 1:
|
||||
continue
|
||||
|
||||
title_test = tag.div.select_one('div[align=center]')
|
||||
|
||||
if not title_test:
|
||||
continue
|
||||
|
||||
title = title_test.contents[0]
|
||||
title_test.decompose()
|
||||
|
||||
text = tag.div.div
|
||||
|
||||
if text.get_text() == '':
|
||||
text = tag.div
|
||||
|
||||
del text['style']
|
||||
|
||||
print(str(post) + ' - ' + title)
|
||||
|
||||
chapter = create_chapter_from_string(str(text), title)
|
||||
volumes[volume].add_chapter(chapter)
|
||||
|
||||
for ebook in volumes:
|
||||
if ebook is None:
|
||||
continue
|
||||
|
||||
ebook.create_epub('output')
|
@ -1,3 +0,0 @@
|
||||
requests==2.25.1
|
||||
beautifulsoup4==4.9.3
|
||||
git+git://github.com/Xefir/pypub@fix/py3#egg=pypub
|
12
utils.py
12
utils.py
@ -1,12 +0,0 @@
|
||||
from bs4.element import NavigableString
|
||||
|
||||
|
||||
def strip_content(tag):
|
||||
# strip content from all children
|
||||
children = [strip_content(child) for child in tag.children if not isinstance(child, NavigableString)]
|
||||
# remove everything from the tag
|
||||
tag.clear()
|
||||
for child in children:
|
||||
# Add back stripped children
|
||||
tag.append(child)
|
||||
return tag
|
Reference in New Issue
Block a user