Browse Source

Back to python + Gravesong

pull/19/head
Michel Roux 4 months ago
parent
commit
d3a7ac0fc2
  1. 10
      .drone.yml
  2. 8
      .eslintrc.yml
  3. 4
      .gitignore
  4. 10
      executor.py
  5. 41
      gravesong.py
  6. 86
      index.js
  7. 26
      package.json
  8. 3
      requirements.txt
  9. 69
      run.py
  10. 58
      twi.py
  11. 1736
      yarn.lock

10
.drone.yml

@ -4,11 +4,13 @@ type: docker
steps:
- name: epub
image: node
image: python-slim
commands:
- yarn install
- yarn lint
- yarn start
- pip install flake8 black
- flake8 --ignore=501
- black --check
- pip install -r requirements.txt
- python run.py
- name: pdf
image: linuxserver/calibre
commands:

8
.eslintrc.yml

@ -1,8 +0,0 @@
env:
es2021: true
node: true
extends: 'eslint:recommended'
parserOptions:
ecmaVersion: 12
sourceType: module
rules: {}

4
.gitignore vendored

@ -1,2 +1,4 @@
__pycache__
.venv
.vscode
output
node_modules

10
executor.py

@ -0,0 +1,10 @@
import asyncio
def executor(func):
async def wrapper(*args, **kwargs):
await asyncio.get_event_loop().run_in_executor(
None, lambda: func(*args, **kwargs)
)
return wrapper

41
gravesong.py

@ -0,0 +1,41 @@
import bs4
import executor
import pypub
import requests
@executor.executor
def process(metadata, output_dir):
book = pypub.Epub("Gravesong", *metadata)
response = requests.post(
"https://wanderinginn.com/wp-pass.php",
data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"},
headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"},
)
html = bs4.BeautifulSoup(response.content, "html.parser")
entry_content = html.select_one("div.entry-content")
content_imgs = entry_content.find_all("img")
for content_img in content_imgs:
content_img.decompose()
chapter_title = "Introduction"
chapter_content = ""
for paragraph in entry_content.children:
if paragraph.name == "h1":
print(f"Gravesong - {chapter_title}")
book.add_chapter(
pypub.create_chapter_from_string(chapter_content, chapter_title)
)
chapter_title = paragraph.get_text()
elif paragraph.name == "p":
chapter_content += paragraph.prettify()
print(f"Gravesong - {chapter_title}")
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
print("Gravesong - Book")
book.create_epub(output_dir)

86
index.js

@ -1,86 +0,0 @@
import fs from 'fs'
import cheerio from 'cheerio'
import fetch from 'node-fetch'
import Epub from 'epub-gen-funstory'
if (!fs.existsSync('output')) {
fs.mkdirSync('output')
}
const metadata = (volume, title, author, tocTitle, description) => ({
title: `${title} - Volume ${volume}`,
author,
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
output: `output/${title} - Volume ${volume}.epub`,
version: 3,
lang: 'fr',
tocTitle,
appendChapterTitles: true,
content: [],
links: [],
verbose: true,
description,
})
const fetchPage = async (url) => {
const response = await fetch(url)
const responseHtml = await response.text()
const html = cheerio.load(responseHtml)
const title = html('h1.entry-title').text()
const content = html('div.entry-content')
content.find('a').remove()
content.find('h3').remove()
content.find('hr').remove()
content.find('div.tiled-gallery').remove()
const data = content.html()
console.log(title)
return {
title,
data,
}
}
const run = async (url, authors) => {
const books = []
const response = await fetch(url)
const responseHtml = await response.text()
const html = cheerio.load(responseHtml)
const content = html('div.entry-content > p')
const title = html('#site-title > span > a').text()
const summary = html('h1.entry-title').text()
const description = html('#site-description').text()
let volume = 0;
content.each((i, el) => {
if (i % 2 === 0) {
volume = parseInt(html(el).text().replace(/Volume /, '').trim())
if (isNaN(volume)) return
books.push(metadata(volume, title, authors, summary, description))
} else {
html('a', el).each((i, el) => {
books[volume - 1].links.push(html(el).attr('href'))
})
}
})
books.map(async book => {
for (const link of book.links) {
book.content.push(await fetchPage(link))
}
new Epub(book)
})
}
run(
'https://aubergevagabonde.wordpress.com/sommaire/',
['Maroti', 'ElliVia', 'Pirateaba']
)
run(
'https://wanderinginn.com/table-of-contents/',
['Pirateaba']
)

26
package.json

@ -1,26 +0,0 @@
{
"name": "auberge_vagabonde_js",
"version": "1.0.0",
"description": "Convert The Wandering Inn Into Ebooks",
"repository": {
"type": "git",
"url": "https://git.crystalyx.net/Xefir/Auberge_Vagabonde_JS"
},
"author": "Xéfir Destiny",
"license": "ISC",
"main": "index.js",
"scripts": {
"start": "node .",
"lint": "eslint ."
},
"dependencies": {
"cheerio": "1.0.0-rc.10",
"epub-gen-funstory": "0.1.3",
"node-fetch": "2"
},
"devDependencies": {
"@types/node-fetch": "2",
"eslint": "8.7.0"
},
"type": "module"
}

3
requirements.txt

@ -0,0 +1,3 @@
requests==2.27.1
beautifulsoup4==4.10.0
git+https://git.crystalyx.net/Xefir/[email protected]/py3#pypub

69
run.py

@ -0,0 +1,69 @@
import asyncio
import gravesong
import logging
import os
import requests
import twi
loggers = [logging.getLogger()] + [
logging.getLogger(name) for name in logging.root.manager.loggerDict
]
for logger in loggers:
logger.setLevel(logging.INFO)
output_epubs = "output/epubs"
output_imgs = "output/imgs"
if not os.path.isdir(output_epubs):
os.makedirs(output_epubs)
if not os.path.isdir(output_imgs):
os.makedirs(output_imgs)
gravesong_img = requests.get(
"https://wanderinginn.files.wordpress.com/2021/12/gravesong-by-boboplushie.jpg"
)
with open(f"{output_imgs}/gravesong-by-boboplushie.jpg", "wb") as f:
f.write(gravesong_img.content)
twi_img = requests.get(
"https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg"
)
with open(f"{output_imgs}/twi.jpg", "wb") as f:
f.write(twi_img.content)
async def main():
await asyncio.gather(
gravesong.process(
{
"creator": "Pirateaba",
"language": "en",
"publisher": "Xefir",
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
},
output_epubs,
),
twi.process(
{
"creator": "Pirateaba",
"language": "en",
"publisher": "Xefir",
"cover": f"{output_imgs}/twi.jpg",
},
output_epubs,
"https://wanderinginn.com/table-of-contents/",
),
twi.process(
{
"creator": "Pirateaba",
"language": "fr",
"publisher": "Maroti, ElliVia",
"cover": f"{output_imgs}/twi.jpg",
},
output_epubs,
"https://aubergevagabonde.wordpress.com/sommaire/",
),
)
asyncio.run(main())

58
twi.py

@ -0,0 +1,58 @@
import asyncio
import executor
import bs4
import pypub
import requests
def fetchVolume(title, metadata, volume_title, output_dir, links):
book = pypub.Epub(f"{title} - {volume_title}", *metadata)
for link in links:
chapter_response = requests.get(link["href"])
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
chapter_content = chapter_html.select_one("div.entry-content")
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
a_remove = chapter_content.find_all("a")
hr_remove = chapter_content.find_all("hr")
div_remove = chapter_content.find_all("div.tiled-gallery")
for removed in a_remove:
removed.decompose()
for removed in hr_remove:
removed.decompose()
for removed in div_remove:
removed.decompose()
print(f"{title} - {chapter_title}")
book.add_chapter(
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
)
print(f"{title} - {volume_title}")
book.create_epub(output_dir)
@executor.executor
def process(metadata, output_dir, url):
response = requests.get(url)
html = bs4.BeautifulSoup(response.content, "html.parser")
content = html.select("div.entry-content > p")
title = html.select_one("#site-title > span > a").get_text()
volume_title = None
tasks = []
for paragraph in content:
if paragraph.strong is not None:
volume_title = paragraph.strong.get_text()
else:
tasks.append(
fetchVolume(
title, metadata, volume_title, output_dir, paragraph.find_all("a")
)
)
asyncio.run(asyncio.gather(*tasks))

1736
yarn.lock

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save