Back to python + Gravesong
Some checks reported errors
continuous-integration/drone/push Build encountered an error
Some checks reported errors
continuous-integration/drone/push Build encountered an error
This commit is contained in:
parent
a3e84310ff
commit
d3a7ac0fc2
10
.drone.yml
10
.drone.yml
@ -4,11 +4,13 @@ type: docker
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: epub
|
- name: epub
|
||||||
image: node
|
image: python-slim
|
||||||
commands:
|
commands:
|
||||||
- yarn install
|
- pip install flake8 black
|
||||||
- yarn lint
|
- flake8 --ignore=501
|
||||||
- yarn start
|
- black --check
|
||||||
|
- pip install -r requirements.txt
|
||||||
|
- python run.py
|
||||||
- name: pdf
|
- name: pdf
|
||||||
image: linuxserver/calibre
|
image: linuxserver/calibre
|
||||||
commands:
|
commands:
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
env:
|
|
||||||
es2021: true
|
|
||||||
node: true
|
|
||||||
extends: 'eslint:recommended'
|
|
||||||
parserOptions:
|
|
||||||
ecmaVersion: 12
|
|
||||||
sourceType: module
|
|
||||||
rules: {}
|
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
|
__pycache__
|
||||||
|
.venv
|
||||||
|
.vscode
|
||||||
output
|
output
|
||||||
node_modules
|
|
||||||
|
10
executor.py
Normal file
10
executor.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
|
def executor(func):
|
||||||
|
async def wrapper(*args, **kwargs):
|
||||||
|
await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None, lambda: func(*args, **kwargs)
|
||||||
|
)
|
||||||
|
|
||||||
|
return wrapper
|
41
gravesong.py
Normal file
41
gravesong.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import bs4
|
||||||
|
import executor
|
||||||
|
import pypub
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
@executor.executor
|
||||||
|
def process(metadata, output_dir):
|
||||||
|
book = pypub.Epub("Gravesong", *metadata)
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"https://wanderinginn.com/wp-pass.php",
|
||||||
|
data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"},
|
||||||
|
headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"},
|
||||||
|
)
|
||||||
|
|
||||||
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||||
|
entry_content = html.select_one("div.entry-content")
|
||||||
|
content_imgs = entry_content.find_all("img")
|
||||||
|
|
||||||
|
for content_img in content_imgs:
|
||||||
|
content_img.decompose()
|
||||||
|
|
||||||
|
chapter_title = "Introduction"
|
||||||
|
chapter_content = ""
|
||||||
|
|
||||||
|
for paragraph in entry_content.children:
|
||||||
|
if paragraph.name == "h1":
|
||||||
|
print(f"Gravesong - {chapter_title}")
|
||||||
|
book.add_chapter(
|
||||||
|
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
||||||
|
)
|
||||||
|
chapter_title = paragraph.get_text()
|
||||||
|
elif paragraph.name == "p":
|
||||||
|
chapter_content += paragraph.prettify()
|
||||||
|
|
||||||
|
print(f"Gravesong - {chapter_title}")
|
||||||
|
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
|
||||||
|
|
||||||
|
print("Gravesong - Book")
|
||||||
|
book.create_epub(output_dir)
|
86
index.js
86
index.js
@ -1,86 +0,0 @@
|
|||||||
import fs from 'fs'
|
|
||||||
import cheerio from 'cheerio'
|
|
||||||
import fetch from 'node-fetch'
|
|
||||||
import Epub from 'epub-gen-funstory'
|
|
||||||
|
|
||||||
if (!fs.existsSync('output')) {
|
|
||||||
fs.mkdirSync('output')
|
|
||||||
}
|
|
||||||
|
|
||||||
const metadata = (volume, title, author, tocTitle, description) => ({
|
|
||||||
title: `${title} - Volume ${volume}`,
|
|
||||||
author,
|
|
||||||
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
|
|
||||||
output: `output/${title} - Volume ${volume}.epub`,
|
|
||||||
version: 3,
|
|
||||||
lang: 'fr',
|
|
||||||
tocTitle,
|
|
||||||
appendChapterTitles: true,
|
|
||||||
content: [],
|
|
||||||
links: [],
|
|
||||||
verbose: true,
|
|
||||||
description,
|
|
||||||
})
|
|
||||||
|
|
||||||
const fetchPage = async (url) => {
|
|
||||||
const response = await fetch(url)
|
|
||||||
const responseHtml = await response.text()
|
|
||||||
const html = cheerio.load(responseHtml)
|
|
||||||
|
|
||||||
const title = html('h1.entry-title').text()
|
|
||||||
const content = html('div.entry-content')
|
|
||||||
content.find('a').remove()
|
|
||||||
content.find('h3').remove()
|
|
||||||
content.find('hr').remove()
|
|
||||||
content.find('div.tiled-gallery').remove()
|
|
||||||
const data = content.html()
|
|
||||||
|
|
||||||
console.log(title)
|
|
||||||
|
|
||||||
return {
|
|
||||||
title,
|
|
||||||
data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const run = async (url, authors) => {
|
|
||||||
const books = []
|
|
||||||
const response = await fetch(url)
|
|
||||||
const responseHtml = await response.text()
|
|
||||||
const html = cheerio.load(responseHtml)
|
|
||||||
|
|
||||||
const content = html('div.entry-content > p')
|
|
||||||
const title = html('#site-title > span > a').text()
|
|
||||||
const summary = html('h1.entry-title').text()
|
|
||||||
const description = html('#site-description').text()
|
|
||||||
let volume = 0;
|
|
||||||
|
|
||||||
content.each((i, el) => {
|
|
||||||
if (i % 2 === 0) {
|
|
||||||
volume = parseInt(html(el).text().replace(/Volume /, '').trim())
|
|
||||||
if (isNaN(volume)) return
|
|
||||||
books.push(metadata(volume, title, authors, summary, description))
|
|
||||||
} else {
|
|
||||||
html('a', el).each((i, el) => {
|
|
||||||
books[volume - 1].links.push(html(el).attr('href'))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
books.map(async book => {
|
|
||||||
for (const link of book.links) {
|
|
||||||
book.content.push(await fetchPage(link))
|
|
||||||
}
|
|
||||||
|
|
||||||
new Epub(book)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
run(
|
|
||||||
'https://aubergevagabonde.wordpress.com/sommaire/',
|
|
||||||
['Maroti', 'ElliVia', 'Pirateaba']
|
|
||||||
)
|
|
||||||
run(
|
|
||||||
'https://wanderinginn.com/table-of-contents/',
|
|
||||||
['Pirateaba']
|
|
||||||
)
|
|
26
package.json
26
package.json
@ -1,26 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "auberge_vagabonde_js",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "Convert The Wandering Inn Into Ebooks",
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://git.crystalyx.net/Xefir/Auberge_Vagabonde_JS"
|
|
||||||
},
|
|
||||||
"author": "Xéfir Destiny",
|
|
||||||
"license": "ISC",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"start": "node .",
|
|
||||||
"lint": "eslint ."
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"cheerio": "1.0.0-rc.10",
|
|
||||||
"epub-gen-funstory": "0.1.3",
|
|
||||||
"node-fetch": "2"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/node-fetch": "2",
|
|
||||||
"eslint": "8.7.0"
|
|
||||||
},
|
|
||||||
"type": "module"
|
|
||||||
}
|
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
requests==2.27.1
|
||||||
|
beautifulsoup4==4.10.0
|
||||||
|
git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub
|
69
run.py
Normal file
69
run.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import asyncio
|
||||||
|
import gravesong
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import twi
|
||||||
|
|
||||||
|
|
||||||
|
loggers = [logging.getLogger()] + [
|
||||||
|
logging.getLogger(name) for name in logging.root.manager.loggerDict
|
||||||
|
]
|
||||||
|
for logger in loggers:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
output_epubs = "output/epubs"
|
||||||
|
output_imgs = "output/imgs"
|
||||||
|
if not os.path.isdir(output_epubs):
|
||||||
|
os.makedirs(output_epubs)
|
||||||
|
if not os.path.isdir(output_imgs):
|
||||||
|
os.makedirs(output_imgs)
|
||||||
|
|
||||||
|
gravesong_img = requests.get(
|
||||||
|
"https://wanderinginn.files.wordpress.com/2021/12/gravesong-by-boboplushie.jpg"
|
||||||
|
)
|
||||||
|
with open(f"{output_imgs}/gravesong-by-boboplushie.jpg", "wb") as f:
|
||||||
|
f.write(gravesong_img.content)
|
||||||
|
|
||||||
|
twi_img = requests.get(
|
||||||
|
"https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg"
|
||||||
|
)
|
||||||
|
with open(f"{output_imgs}/twi.jpg", "wb") as f:
|
||||||
|
f.write(twi_img.content)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
await asyncio.gather(
|
||||||
|
gravesong.process(
|
||||||
|
{
|
||||||
|
"creator": "Pirateaba",
|
||||||
|
"language": "en",
|
||||||
|
"publisher": "Xefir",
|
||||||
|
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
|
||||||
|
},
|
||||||
|
output_epubs,
|
||||||
|
),
|
||||||
|
twi.process(
|
||||||
|
{
|
||||||
|
"creator": "Pirateaba",
|
||||||
|
"language": "en",
|
||||||
|
"publisher": "Xefir",
|
||||||
|
"cover": f"{output_imgs}/twi.jpg",
|
||||||
|
},
|
||||||
|
output_epubs,
|
||||||
|
"https://wanderinginn.com/table-of-contents/",
|
||||||
|
),
|
||||||
|
twi.process(
|
||||||
|
{
|
||||||
|
"creator": "Pirateaba",
|
||||||
|
"language": "fr",
|
||||||
|
"publisher": "Maroti, ElliVia",
|
||||||
|
"cover": f"{output_imgs}/twi.jpg",
|
||||||
|
},
|
||||||
|
output_epubs,
|
||||||
|
"https://aubergevagabonde.wordpress.com/sommaire/",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
asyncio.run(main())
|
58
twi.py
Normal file
58
twi.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import asyncio
|
||||||
|
import executor
|
||||||
|
import bs4
|
||||||
|
import pypub
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def fetchVolume(title, metadata, volume_title, output_dir, links):
|
||||||
|
book = pypub.Epub(f"{title} - {volume_title}", *metadata)
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
chapter_response = requests.get(link["href"])
|
||||||
|
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
||||||
|
chapter_content = chapter_html.select_one("div.entry-content")
|
||||||
|
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
|
||||||
|
|
||||||
|
a_remove = chapter_content.find_all("a")
|
||||||
|
hr_remove = chapter_content.find_all("hr")
|
||||||
|
div_remove = chapter_content.find_all("div.tiled-gallery")
|
||||||
|
|
||||||
|
for removed in a_remove:
|
||||||
|
removed.decompose()
|
||||||
|
for removed in hr_remove:
|
||||||
|
removed.decompose()
|
||||||
|
for removed in div_remove:
|
||||||
|
removed.decompose()
|
||||||
|
|
||||||
|
print(f"{title} - {chapter_title}")
|
||||||
|
book.add_chapter(
|
||||||
|
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"{title} - {volume_title}")
|
||||||
|
book.create_epub(output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@executor.executor
|
||||||
|
def process(metadata, output_dir, url):
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||||
|
content = html.select("div.entry-content > p")
|
||||||
|
title = html.select_one("#site-title > span > a").get_text()
|
||||||
|
|
||||||
|
volume_title = None
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
for paragraph in content:
|
||||||
|
if paragraph.strong is not None:
|
||||||
|
volume_title = paragraph.strong.get_text()
|
||||||
|
else:
|
||||||
|
tasks.append(
|
||||||
|
fetchVolume(
|
||||||
|
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
asyncio.run(asyncio.gather(*tasks))
|
Reference in New Issue
Block a user