Back to python + Gravesong

2022-01-18 20:43:45 +00:00 · 2022-01-18 20:43:45 +00:00 · d3a7ac0fc2
commit d3a7ac0fc2
parent a3e84310ff
11 changed files with 190 additions and 1861 deletions
--- a/.drone.yml
+++ b/.drone.yml
@ -4,11 +4,13 @@ type: docker

 steps:
  - name: epub
-    image: node
+    image: python-slim
    commands:
-      - yarn install
-      - yarn lint
-      - yarn start
+      - pip install flake8 black
+      - flake8 --ignore=501
+      - black --check
+      - pip install -r requirements.txt
+      - python run.py
  - name: pdf
    image: linuxserver/calibre
    commands:
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@ -1,8 +0,0 @@
-env:
-  es2021: true
-  node: true
-extends: 'eslint:recommended'
-parserOptions:
-  ecmaVersion: 12
-  sourceType: module
-rules: {}
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
+__pycache__
+.venv
+.vscode
 output
-node_modules
--- a/executor.py
+++ b/executor.py
@ -0,0 +1,10 @@
+import asyncio
+
+
+def executor(func):
+    async def wrapper(*args, **kwargs):
+        await asyncio.get_event_loop().run_in_executor(
+            None, lambda: func(*args, **kwargs)
+        )
+
+    return wrapper
--- a/gravesong.py
+++ b/gravesong.py
@ -0,0 +1,41 @@
+import bs4
+import executor
+import pypub
+import requests
+
+
+@executor.executor
+def process(metadata, output_dir):
+    book = pypub.Epub("Gravesong", *metadata)
+
+    response = requests.post(
+        "https://wanderinginn.com/wp-pass.php",
+        data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"},
+        headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"},
+    )
+
+    html = bs4.BeautifulSoup(response.content, "html.parser")
+    entry_content = html.select_one("div.entry-content")
+    content_imgs = entry_content.find_all("img")
+
+    for content_img in content_imgs:
+        content_img.decompose()
+
+    chapter_title = "Introduction"
+    chapter_content = ""
+
+    for paragraph in entry_content.children:
+        if paragraph.name == "h1":
+            print(f"Gravesong - {chapter_title}")
+            book.add_chapter(
+                pypub.create_chapter_from_string(chapter_content, chapter_title)
+            )
+            chapter_title = paragraph.get_text()
+        elif paragraph.name == "p":
+            chapter_content += paragraph.prettify()
+
+    print(f"Gravesong - {chapter_title}")
+    book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
+
+    print("Gravesong - Book")
+    book.create_epub(output_dir)
--- a/index.js
+++ b/index.js
@ -1,86 +0,0 @@
-import fs from 'fs'
-import cheerio from 'cheerio'
-import fetch from 'node-fetch'
-import Epub from 'epub-gen-funstory'
-
-if (!fs.existsSync('output')) {
-    fs.mkdirSync('output')
-}
-
-const metadata = (volume, title, author, tocTitle, description) => ({
-    title: `${title} - Volume ${volume}`,
-    author,
-    cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
-    output: `output/${title} - Volume ${volume}.epub`,
-    version: 3,
-    lang: 'fr',
-    tocTitle,
-    appendChapterTitles: true,
-    content: [],
-    links: [],
-    verbose: true,
-    description,
-})
-
-const fetchPage = async (url) => {
-    const response = await fetch(url)
-    const responseHtml = await response.text()
-    const html = cheerio.load(responseHtml)
-
-    const title = html('h1.entry-title').text()
-    const content = html('div.entry-content')
-    content.find('a').remove()
-    content.find('h3').remove()
-    content.find('hr').remove()
-    content.find('div.tiled-gallery').remove()
-    const data = content.html()
-
-    console.log(title)
-
-    return {
-        title,
-        data,
-    }
-}
-
-const run = async (url, authors) => {
-    const books = []
-    const response = await fetch(url)
-    const responseHtml = await response.text()
-    const html = cheerio.load(responseHtml)
-
-    const content = html('div.entry-content > p')
-    const title = html('#site-title > span > a').text()
-    const summary = html('h1.entry-title').text()
-    const description = html('#site-description').text()
-    let volume = 0;
-
-    content.each((i, el) => {
-        if (i % 2 === 0) {
-            volume = parseInt(html(el).text().replace(/Volume /, '').trim())
-            if (isNaN(volume)) return
-            books.push(metadata(volume, title, authors, summary, description))
-        } else {
-            html('a', el).each((i, el) => {
-                books[volume - 1].links.push(html(el).attr('href'))
-            })
-        }
-    })
-
-    books.map(async book => {
-        for (const link of book.links) {
-            book.content.push(await fetchPage(link))
-        }
-
-        new Epub(book)
-    })
-}
-
-run(
-    'https://aubergevagabonde.wordpress.com/sommaire/',
-    ['Maroti', 'ElliVia', 'Pirateaba']
-)
-run(
-    'https://wanderinginn.com/table-of-contents/',
-    ['Pirateaba']
-)
--- a/package.json
+++ b/package.json
@ -1,26 +0,0 @@
-{
-  "name": "auberge_vagabonde_js",
-  "version": "1.0.0",
-  "description": "Convert The Wandering Inn Into Ebooks",
-  "repository": {
-    "type": "git",
-    "url": "https://git.crystalyx.net/Xefir/Auberge_Vagabonde_JS"
-  },
-  "author": "Xéfir Destiny",
-  "license": "ISC",
-  "main": "index.js",
-  "scripts": {
-    "start": "node .",
-    "lint": "eslint ."
-  },
-  "dependencies": {
-    "cheerio": "1.0.0-rc.10",
-    "epub-gen-funstory": "0.1.3",
-    "node-fetch": "2"
-  },
-  "devDependencies": {
-    "@types/node-fetch": "2",
-    "eslint": "8.7.0"
-  },
-  "type": "module"
-}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+requests==2.27.1
+beautifulsoup4==4.10.0
+git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub
--- a/run.py
+++ b/run.py
@ -0,0 +1,69 @@
+import asyncio
+import gravesong
+import logging
+import os
+import requests
+import twi
+
+
+loggers = [logging.getLogger()] + [
+    logging.getLogger(name) for name in logging.root.manager.loggerDict
+]
+for logger in loggers:
+    logger.setLevel(logging.INFO)
+
+output_epubs = "output/epubs"
+output_imgs = "output/imgs"
+if not os.path.isdir(output_epubs):
+    os.makedirs(output_epubs)
+if not os.path.isdir(output_imgs):
+    os.makedirs(output_imgs)
+
+gravesong_img = requests.get(
+    "https://wanderinginn.files.wordpress.com/2021/12/gravesong-by-boboplushie.jpg"
+)
+with open(f"{output_imgs}/gravesong-by-boboplushie.jpg", "wb") as f:
+    f.write(gravesong_img.content)
+
+twi_img = requests.get(
+    "https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg"
+)
+with open(f"{output_imgs}/twi.jpg", "wb") as f:
+    f.write(twi_img.content)
+
+
+async def main():
+    await asyncio.gather(
+        gravesong.process(
+            {
+                "creator": "Pirateaba",
+                "language": "en",
+                "publisher": "Xefir",
+                "cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
+            },
+            output_epubs,
+        ),
+        twi.process(
+            {
+                "creator": "Pirateaba",
+                "language": "en",
+                "publisher": "Xefir",
+                "cover": f"{output_imgs}/twi.jpg",
+            },
+            output_epubs,
+            "https://wanderinginn.com/table-of-contents/",
+        ),
+        twi.process(
+            {
+                "creator": "Pirateaba",
+                "language": "fr",
+                "publisher": "Maroti, ElliVia",
+                "cover": f"{output_imgs}/twi.jpg",
+            },
+            output_epubs,
+            "https://aubergevagabonde.wordpress.com/sommaire/",
+        ),
+    )
+
+
+asyncio.run(main())
--- a/twi.py
+++ b/twi.py
@ -0,0 +1,58 @@
+import asyncio
+import executor
+import bs4
+import pypub
+import requests
+
+
+def fetchVolume(title, metadata, volume_title, output_dir, links):
+    book = pypub.Epub(f"{title} - {volume_title}", *metadata)
+
+    for link in links:
+        chapter_response = requests.get(link["href"])
+        chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
+        chapter_content = chapter_html.select_one("div.entry-content")
+        chapter_title = chapter_html.select_one("h1.entry-title").get_text()
+
+        a_remove = chapter_content.find_all("a")
+        hr_remove = chapter_content.find_all("hr")
+        div_remove = chapter_content.find_all("div.tiled-gallery")
+
+        for removed in a_remove:
+            removed.decompose()
+        for removed in hr_remove:
+            removed.decompose()
+        for removed in div_remove:
+            removed.decompose()
+
+        print(f"{title} - {chapter_title}")
+        book.add_chapter(
+            pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
+        )
+
+    print(f"{title} - {volume_title}")
+    book.create_epub(output_dir)
+
+
+@executor.executor
+def process(metadata, output_dir, url):
+    response = requests.get(url)
+
+    html = bs4.BeautifulSoup(response.content, "html.parser")
+    content = html.select("div.entry-content > p")
+    title = html.select_one("#site-title > span > a").get_text()
+
+    volume_title = None
+    tasks = []
+
+    for paragraph in content:
+        if paragraph.strong is not None:
+            volume_title = paragraph.strong.get_text()
+        else:
+            tasks.append(
+                fetchVolume(
+                    title, metadata, volume_title, output_dir, paragraph.find_all("a")
+                )
+            )
+
+    asyncio.run(asyncio.gather(*tasks))
--- a/yarn.lock
+++ b/yarn.lock