Back to python + Gravesong
Some checks reported errors
continuous-integration/drone/push Build encountered an error
Some checks reported errors
continuous-integration/drone/push Build encountered an error
This commit is contained in:
parent
a3e84310ff
commit
d3a7ac0fc2
10
.drone.yml
10
.drone.yml
@ -4,11 +4,13 @@ type: docker
|
||||
|
||||
steps:
|
||||
- name: epub
|
||||
image: node
|
||||
image: python-slim
|
||||
commands:
|
||||
- yarn install
|
||||
- yarn lint
|
||||
- yarn start
|
||||
- pip install flake8 black
|
||||
- flake8 --ignore=501
|
||||
- black --check
|
||||
- pip install -r requirements.txt
|
||||
- python run.py
|
||||
- name: pdf
|
||||
image: linuxserver/calibre
|
||||
commands:
|
||||
|
@ -1,8 +0,0 @@
|
||||
env:
|
||||
es2021: true
|
||||
node: true
|
||||
extends: 'eslint:recommended'
|
||||
parserOptions:
|
||||
ecmaVersion: 12
|
||||
sourceType: module
|
||||
rules: {}
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
__pycache__
|
||||
.venv
|
||||
.vscode
|
||||
output
|
||||
node_modules
|
||||
|
10
executor.py
Normal file
10
executor.py
Normal file
@ -0,0 +1,10 @@
|
||||
import asyncio
|
||||
|
||||
|
||||
def executor(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
await asyncio.get_event_loop().run_in_executor(
|
||||
None, lambda: func(*args, **kwargs)
|
||||
)
|
||||
|
||||
return wrapper
|
41
gravesong.py
Normal file
41
gravesong.py
Normal file
@ -0,0 +1,41 @@
|
||||
import bs4
|
||||
import executor
|
||||
import pypub
|
||||
import requests
|
||||
|
||||
|
||||
@executor.executor
|
||||
def process(metadata, output_dir):
|
||||
book = pypub.Epub("Gravesong", *metadata)
|
||||
|
||||
response = requests.post(
|
||||
"https://wanderinginn.com/wp-pass.php",
|
||||
data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"},
|
||||
headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"},
|
||||
)
|
||||
|
||||
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||
entry_content = html.select_one("div.entry-content")
|
||||
content_imgs = entry_content.find_all("img")
|
||||
|
||||
for content_img in content_imgs:
|
||||
content_img.decompose()
|
||||
|
||||
chapter_title = "Introduction"
|
||||
chapter_content = ""
|
||||
|
||||
for paragraph in entry_content.children:
|
||||
if paragraph.name == "h1":
|
||||
print(f"Gravesong - {chapter_title}")
|
||||
book.add_chapter(
|
||||
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
||||
)
|
||||
chapter_title = paragraph.get_text()
|
||||
elif paragraph.name == "p":
|
||||
chapter_content += paragraph.prettify()
|
||||
|
||||
print(f"Gravesong - {chapter_title}")
|
||||
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
|
||||
|
||||
print("Gravesong - Book")
|
||||
book.create_epub(output_dir)
|
86
index.js
86
index.js
@ -1,86 +0,0 @@
|
||||
import fs from 'fs'
|
||||
import cheerio from 'cheerio'
|
||||
import fetch from 'node-fetch'
|
||||
import Epub from 'epub-gen-funstory'
|
||||
|
||||
if (!fs.existsSync('output')) {
|
||||
fs.mkdirSync('output')
|
||||
}
|
||||
|
||||
const metadata = (volume, title, author, tocTitle, description) => ({
|
||||
title: `${title} - Volume ${volume}`,
|
||||
author,
|
||||
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
|
||||
output: `output/${title} - Volume ${volume}.epub`,
|
||||
version: 3,
|
||||
lang: 'fr',
|
||||
tocTitle,
|
||||
appendChapterTitles: true,
|
||||
content: [],
|
||||
links: [],
|
||||
verbose: true,
|
||||
description,
|
||||
})
|
||||
|
||||
const fetchPage = async (url) => {
|
||||
const response = await fetch(url)
|
||||
const responseHtml = await response.text()
|
||||
const html = cheerio.load(responseHtml)
|
||||
|
||||
const title = html('h1.entry-title').text()
|
||||
const content = html('div.entry-content')
|
||||
content.find('a').remove()
|
||||
content.find('h3').remove()
|
||||
content.find('hr').remove()
|
||||
content.find('div.tiled-gallery').remove()
|
||||
const data = content.html()
|
||||
|
||||
console.log(title)
|
||||
|
||||
return {
|
||||
title,
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
const run = async (url, authors) => {
|
||||
const books = []
|
||||
const response = await fetch(url)
|
||||
const responseHtml = await response.text()
|
||||
const html = cheerio.load(responseHtml)
|
||||
|
||||
const content = html('div.entry-content > p')
|
||||
const title = html('#site-title > span > a').text()
|
||||
const summary = html('h1.entry-title').text()
|
||||
const description = html('#site-description').text()
|
||||
let volume = 0;
|
||||
|
||||
content.each((i, el) => {
|
||||
if (i % 2 === 0) {
|
||||
volume = parseInt(html(el).text().replace(/Volume /, '').trim())
|
||||
if (isNaN(volume)) return
|
||||
books.push(metadata(volume, title, authors, summary, description))
|
||||
} else {
|
||||
html('a', el).each((i, el) => {
|
||||
books[volume - 1].links.push(html(el).attr('href'))
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
books.map(async book => {
|
||||
for (const link of book.links) {
|
||||
book.content.push(await fetchPage(link))
|
||||
}
|
||||
|
||||
new Epub(book)
|
||||
})
|
||||
}
|
||||
|
||||
run(
|
||||
'https://aubergevagabonde.wordpress.com/sommaire/',
|
||||
['Maroti', 'ElliVia', 'Pirateaba']
|
||||
)
|
||||
run(
|
||||
'https://wanderinginn.com/table-of-contents/',
|
||||
['Pirateaba']
|
||||
)
|
26
package.json
26
package.json
@ -1,26 +0,0 @@
|
||||
{
|
||||
"name": "auberge_vagabonde_js",
|
||||
"version": "1.0.0",
|
||||
"description": "Convert The Wandering Inn Into Ebooks",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://git.crystalyx.net/Xefir/Auberge_Vagabonde_JS"
|
||||
},
|
||||
"author": "Xéfir Destiny",
|
||||
"license": "ISC",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"start": "node .",
|
||||
"lint": "eslint ."
|
||||
},
|
||||
"dependencies": {
|
||||
"cheerio": "1.0.0-rc.10",
|
||||
"epub-gen-funstory": "0.1.3",
|
||||
"node-fetch": "2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node-fetch": "2",
|
||||
"eslint": "8.7.0"
|
||||
},
|
||||
"type": "module"
|
||||
}
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests==2.27.1
|
||||
beautifulsoup4==4.10.0
|
||||
git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub
|
69
run.py
Normal file
69
run.py
Normal file
@ -0,0 +1,69 @@
|
||||
import asyncio
|
||||
import gravesong
|
||||
import logging
|
||||
import os
|
||||
import requests
|
||||
import twi
|
||||
|
||||
|
||||
loggers = [logging.getLogger()] + [
|
||||
logging.getLogger(name) for name in logging.root.manager.loggerDict
|
||||
]
|
||||
for logger in loggers:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
output_epubs = "output/epubs"
|
||||
output_imgs = "output/imgs"
|
||||
if not os.path.isdir(output_epubs):
|
||||
os.makedirs(output_epubs)
|
||||
if not os.path.isdir(output_imgs):
|
||||
os.makedirs(output_imgs)
|
||||
|
||||
gravesong_img = requests.get(
|
||||
"https://wanderinginn.files.wordpress.com/2021/12/gravesong-by-boboplushie.jpg"
|
||||
)
|
||||
with open(f"{output_imgs}/gravesong-by-boboplushie.jpg", "wb") as f:
|
||||
f.write(gravesong_img.content)
|
||||
|
||||
twi_img = requests.get(
|
||||
"https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg"
|
||||
)
|
||||
with open(f"{output_imgs}/twi.jpg", "wb") as f:
|
||||
f.write(twi_img.content)
|
||||
|
||||
|
||||
async def main():
|
||||
await asyncio.gather(
|
||||
gravesong.process(
|
||||
{
|
||||
"creator": "Pirateaba",
|
||||
"language": "en",
|
||||
"publisher": "Xefir",
|
||||
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
|
||||
},
|
||||
output_epubs,
|
||||
),
|
||||
twi.process(
|
||||
{
|
||||
"creator": "Pirateaba",
|
||||
"language": "en",
|
||||
"publisher": "Xefir",
|
||||
"cover": f"{output_imgs}/twi.jpg",
|
||||
},
|
||||
output_epubs,
|
||||
"https://wanderinginn.com/table-of-contents/",
|
||||
),
|
||||
twi.process(
|
||||
{
|
||||
"creator": "Pirateaba",
|
||||
"language": "fr",
|
||||
"publisher": "Maroti, ElliVia",
|
||||
"cover": f"{output_imgs}/twi.jpg",
|
||||
},
|
||||
output_epubs,
|
||||
"https://aubergevagabonde.wordpress.com/sommaire/",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
asyncio.run(main())
|
58
twi.py
Normal file
58
twi.py
Normal file
@ -0,0 +1,58 @@
|
||||
import asyncio
|
||||
import executor
|
||||
import bs4
|
||||
import pypub
|
||||
import requests
|
||||
|
||||
|
||||
def fetchVolume(title, metadata, volume_title, output_dir, links):
|
||||
book = pypub.Epub(f"{title} - {volume_title}", *metadata)
|
||||
|
||||
for link in links:
|
||||
chapter_response = requests.get(link["href"])
|
||||
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
||||
chapter_content = chapter_html.select_one("div.entry-content")
|
||||
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
|
||||
|
||||
a_remove = chapter_content.find_all("a")
|
||||
hr_remove = chapter_content.find_all("hr")
|
||||
div_remove = chapter_content.find_all("div.tiled-gallery")
|
||||
|
||||
for removed in a_remove:
|
||||
removed.decompose()
|
||||
for removed in hr_remove:
|
||||
removed.decompose()
|
||||
for removed in div_remove:
|
||||
removed.decompose()
|
||||
|
||||
print(f"{title} - {chapter_title}")
|
||||
book.add_chapter(
|
||||
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
|
||||
)
|
||||
|
||||
print(f"{title} - {volume_title}")
|
||||
book.create_epub(output_dir)
|
||||
|
||||
|
||||
@executor.executor
|
||||
def process(metadata, output_dir, url):
|
||||
response = requests.get(url)
|
||||
|
||||
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||
content = html.select("div.entry-content > p")
|
||||
title = html.select_one("#site-title > span > a").get_text()
|
||||
|
||||
volume_title = None
|
||||
tasks = []
|
||||
|
||||
for paragraph in content:
|
||||
if paragraph.strong is not None:
|
||||
volume_title = paragraph.strong.get_text()
|
||||
else:
|
||||
tasks.append(
|
||||
fetchVolume(
|
||||
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
||||
)
|
||||
)
|
||||
|
||||
asyncio.run(asyncio.gather(*tasks))
|
Reference in New Issue
Block a user