11 changed files with 190 additions and 1861 deletions
@ -1,8 +0,0 @@
|
||||
env: |
||||
es2021: true |
||||
node: true |
||||
extends: 'eslint:recommended' |
||||
parserOptions: |
||||
ecmaVersion: 12 |
||||
sourceType: module |
||||
rules: {} |
@ -0,0 +1,10 @@
|
||||
import asyncio |
||||
|
||||
|
||||
def executor(func): |
||||
async def wrapper(*args, **kwargs): |
||||
await asyncio.get_event_loop().run_in_executor( |
||||
None, lambda: func(*args, **kwargs) |
||||
) |
||||
|
||||
return wrapper |
@ -0,0 +1,41 @@
|
||||
import bs4 |
||||
import executor |
||||
import pypub |
||||
import requests |
||||
|
||||
|
||||
@executor.executor |
||||
def process(metadata, output_dir): |
||||
book = pypub.Epub("Gravesong", *metadata) |
||||
|
||||
response = requests.post( |
||||
"https://wanderinginn.com/wp-pass.php", |
||||
data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"}, |
||||
headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"}, |
||||
) |
||||
|
||||
html = bs4.BeautifulSoup(response.content, "html.parser") |
||||
entry_content = html.select_one("div.entry-content") |
||||
content_imgs = entry_content.find_all("img") |
||||
|
||||
for content_img in content_imgs: |
||||
content_img.decompose() |
||||
|
||||
chapter_title = "Introduction" |
||||
chapter_content = "" |
||||
|
||||
for paragraph in entry_content.children: |
||||
if paragraph.name == "h1": |
||||
print(f"Gravesong - {chapter_title}") |
||||
book.add_chapter( |
||||
pypub.create_chapter_from_string(chapter_content, chapter_title) |
||||
) |
||||
chapter_title = paragraph.get_text() |
||||
elif paragraph.name == "p": |
||||
chapter_content += paragraph.prettify() |
||||
|
||||
print(f"Gravesong - {chapter_title}") |
||||
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title)) |
||||
|
||||
print("Gravesong - Book") |
||||
book.create_epub(output_dir) |
@ -1,86 +0,0 @@
|
||||
import fs from 'fs' |
||||
import cheerio from 'cheerio' |
||||
import fetch from 'node-fetch' |
||||
import Epub from 'epub-gen-funstory' |
||||
|
||||
if (!fs.existsSync('output')) { |
||||
fs.mkdirSync('output') |
||||
} |
||||
|
||||
const metadata = (volume, title, author, tocTitle, description) => ({ |
||||
title: `${title} - Volume ${volume}`, |
||||
author, |
||||
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png', |
||||
output: `output/${title} - Volume ${volume}.epub`, |
||||
version: 3, |
||||
lang: 'fr', |
||||
tocTitle, |
||||
appendChapterTitles: true, |
||||
content: [], |
||||
links: [], |
||||
verbose: true, |
||||
description, |
||||
}) |
||||
|
||||
const fetchPage = async (url) => { |
||||
const response = await fetch(url) |
||||
const responseHtml = await response.text() |
||||
const html = cheerio.load(responseHtml) |
||||
|
||||
const title = html('h1.entry-title').text() |
||||
const content = html('div.entry-content') |
||||
content.find('a').remove() |
||||
content.find('h3').remove() |
||||
content.find('hr').remove() |
||||
content.find('div.tiled-gallery').remove() |
||||
const data = content.html() |
||||
|
||||
console.log(title) |
||||
|
||||
return { |
||||
title, |
||||
data, |
||||
} |
||||
} |
||||
|
||||
const run = async (url, authors) => { |
||||
const books = [] |
||||
const response = await fetch(url) |
||||
const responseHtml = await response.text() |
||||
const html = cheerio.load(responseHtml) |
||||
|
||||
const content = html('div.entry-content > p') |
||||
const title = html('#site-title > span > a').text() |
||||
const summary = html('h1.entry-title').text() |
||||
const description = html('#site-description').text() |
||||
let volume = 0; |
||||
|
||||
content.each((i, el) => { |
||||
if (i % 2 === 0) { |
||||
volume = parseInt(html(el).text().replace(/Volume /, '').trim()) |
||||
if (isNaN(volume)) return |
||||
books.push(metadata(volume, title, authors, summary, description)) |
||||
} else { |
||||
html('a', el).each((i, el) => { |
||||
books[volume - 1].links.push(html(el).attr('href')) |
||||
}) |
||||
} |
||||
}) |
||||
|
||||
books.map(async book => { |
||||
for (const link of book.links) { |
||||
book.content.push(await fetchPage(link)) |
||||
} |
||||
|
||||
new Epub(book) |
||||
}) |
||||
} |
||||
|
||||
run( |
||||
'https://aubergevagabonde.wordpress.com/sommaire/', |
||||
['Maroti', 'ElliVia', 'Pirateaba'] |
||||
) |
||||
run( |
||||
'https://wanderinginn.com/table-of-contents/', |
||||
['Pirateaba'] |
||||
) |
@ -1,26 +0,0 @@
|
||||
{ |
||||
"name": "auberge_vagabonde_js", |
||||
"version": "1.0.0", |
||||
"description": "Convert The Wandering Inn Into Ebooks", |
||||
"repository": { |
||||
"type": "git", |
||||
"url": "https://git.crystalyx.net/Xefir/Auberge_Vagabonde_JS" |
||||
}, |
||||
"author": "Xéfir Destiny", |
||||
"license": "ISC", |
||||
"main": "index.js", |
||||
"scripts": { |
||||
"start": "node .", |
||||
"lint": "eslint ." |
||||
}, |
||||
"dependencies": { |
||||
"cheerio": "1.0.0-rc.10", |
||||
"epub-gen-funstory": "0.1.3", |
||||
"node-fetch": "2" |
||||
}, |
||||
"devDependencies": { |
||||
"@types/node-fetch": "2", |
||||
"eslint": "8.7.0" |
||||
}, |
||||
"type": "module" |
||||
} |
@ -0,0 +1,3 @@
|
||||
requests==2.27.1 |
||||
beautifulsoup4==4.10.0 |
||||
git+https://git.crystalyx.net/Xefir/[email protected]/py3#pypub |
@ -0,0 +1,69 @@
|
||||
import asyncio |
||||
import gravesong |
||||
import logging |
||||
import os |
||||
import requests |
||||
import twi |
||||
|
||||
|
||||
loggers = [logging.getLogger()] + [ |
||||
logging.getLogger(name) for name in logging.root.manager.loggerDict |
||||
] |
||||
for logger in loggers: |
||||
logger.setLevel(logging.INFO) |
||||
|
||||
output_epubs = "output/epubs" |
||||
output_imgs = "output/imgs" |
||||
if not os.path.isdir(output_epubs): |
||||
os.makedirs(output_epubs) |
||||
if not os.path.isdir(output_imgs): |
||||
os.makedirs(output_imgs) |
||||
|
||||
gravesong_img = requests.get( |
||||
"https://wanderinginn.files.wordpress.com/2021/12/gravesong-by-boboplushie.jpg" |
||||
) |
||||
with open(f"{output_imgs}/gravesong-by-boboplushie.jpg", "wb") as f: |
||||
f.write(gravesong_img.content) |
||||
|
||||
twi_img = requests.get( |
||||
"https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg" |
||||
) |
||||
with open(f"{output_imgs}/twi.jpg", "wb") as f: |
||||
f.write(twi_img.content) |
||||
|
||||
|
||||
async def main(): |
||||
await asyncio.gather( |
||||
gravesong.process( |
||||
{ |
||||
"creator": "Pirateaba", |
||||
"language": "en", |
||||
"publisher": "Xefir", |
||||
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg", |
||||
}, |
||||
output_epubs, |
||||
), |
||||
twi.process( |
||||
{ |
||||
"creator": "Pirateaba", |
||||
"language": "en", |
||||
"publisher": "Xefir", |
||||
"cover": f"{output_imgs}/twi.jpg", |
||||
}, |
||||
output_epubs, |
||||
"https://wanderinginn.com/table-of-contents/", |
||||
), |
||||
twi.process( |
||||
{ |
||||
"creator": "Pirateaba", |
||||
"language": "fr", |
||||
"publisher": "Maroti, ElliVia", |
||||
"cover": f"{output_imgs}/twi.jpg", |
||||
}, |
||||
output_epubs, |
||||
"https://aubergevagabonde.wordpress.com/sommaire/", |
||||
), |
||||
) |
||||
|
||||
|
||||
asyncio.run(main()) |
@ -0,0 +1,58 @@
|
||||
import asyncio |
||||
import executor |
||||
import bs4 |
||||
import pypub |
||||
import requests |
||||
|
||||
|
||||
def fetchVolume(title, metadata, volume_title, output_dir, links): |
||||
book = pypub.Epub(f"{title} - {volume_title}", *metadata) |
||||
|
||||
for link in links: |
||||
chapter_response = requests.get(link["href"]) |
||||
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser") |
||||
chapter_content = chapter_html.select_one("div.entry-content") |
||||
chapter_title = chapter_html.select_one("h1.entry-title").get_text() |
||||
|
||||
a_remove = chapter_content.find_all("a") |
||||
hr_remove = chapter_content.find_all("hr") |
||||
div_remove = chapter_content.find_all("div.tiled-gallery") |
||||
|
||||
for removed in a_remove: |
||||
removed.decompose() |
||||
for removed in hr_remove: |
||||
removed.decompose() |
||||
for removed in div_remove: |
||||
removed.decompose() |
||||
|
||||
print(f"{title} - {chapter_title}") |
||||
book.add_chapter( |
||||
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title) |
||||
) |
||||
|
||||
print(f"{title} - {volume_title}") |
||||
book.create_epub(output_dir) |
||||
|
||||
|
||||
@executor.executor |
||||
def process(metadata, output_dir, url): |
||||
response = requests.get(url) |
||||
|
||||
html = bs4.BeautifulSoup(response.content, "html.parser") |
||||
content = html.select("div.entry-content > p") |
||||
title = html.select_one("#site-title > span > a").get_text() |
||||
|
||||
volume_title = None |
||||
tasks = [] |
||||
|
||||
for paragraph in content: |
||||
if paragraph.strong is not None: |
||||
volume_title = paragraph.strong.get_text() |
||||
else: |
||||
tasks.append( |
||||
fetchVolume( |
||||
title, metadata, volume_title, output_dir, paragraph.find_all("a") |
||||
) |
||||
) |
||||
|
||||
asyncio.run(asyncio.gather(*tasks)) |
Loading…
Reference in new issue