dl/commands/2hdp.py

#!/usr/bin/env python3
from argparse import ArgumentParser
from csv import DictWriter
from datetime import datetime
from sys import stdout
from time import sleep
from xml.etree import ElementTree

from bs4 import BeautifulSoup
from requests import get

parser = ArgumentParser()
parser.add_argument(
    "--keyword",
    action="append",
    default=["mhdgz", "winks", "pophd", "azaze", "multi"],
    help="Prefered words to search on names",
)
parser.add_argument(
    "--exclude",
    action="append",
    default=["3d", "dvd", "iso", "av1", "zza"],
    help="Excluded words to search on names",
)
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
    "--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args()


def nb_files(id: int):
    response = get(
        f"https://www.ygg.re/engine/get_files?torrent={id}",
        headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
    )
    sleep(1)

    json = response.json()
    soup = BeautifulSoup(json["html"], "html.parser")
    rows = soup.find_all("tr")
    is_mkv = False

    for row in rows:
        columns = row.find_all("td")

        if columns[1].text.strip()[-3:] == "mkv":
            is_mkv = True
            break

    return is_mkv and len(rows) == 1


def parse_size(size):
    # https://stackoverflow.com/a/42865957
    units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
    number = size[:-2]
    unit = size[-2:]

    return int(float(number) * units[unit])


def is_valid(id: int, name: str, size: int, completed: int):
    return (
        size < parse_size("10Go")
        and completed > 100
        and not any(exclude.lower() in name.lower() for exclude in args.exclude)
        and nb_files(id)
    )


writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader()

for feed in args.feed:
    tree = ElementTree.fromstring(get(feed).text)
    loop = 0

    for item in tree.findall(".//item")[::-1]:
        loop += 1
        title = str(item.findtext("title")).strip()
        season = int(
            item.findtext(
                "itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
            )
            or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
        )
        episode = int(
            item.findtext(
                "itunes:episode",
                namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
            )
            or loop
        )

        response = get(
            f"https://www.ygg.re/engine/search?name={title}"
            "&description=&file=&uploader=&category=2145&sub_category=2183"
            "&do=search&order=asc&sort=publish_date",
            headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
        )
        sleep(1)

        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.select("table.table tr")
        column = None

        for keyword in args.keyword:
            for row in rows:
                columns = row.find_all("td")

                if len(columns) == 0:
                    continue

                if keyword.lower() in columns[1].text.strip().lower():
                    if is_valid(
                        int(columns[2].a["target"]),
                        columns[1].text.strip(),
                        parse_size(columns[5].text.strip()),
                        int(columns[6].text.strip()),
                    ):
                        column = columns
                        break
            
            if column:
                break

        writer.writerow(
            {
                "title": title,
                "season": season,
                "episode": episode,
                "name": column[1].text.strip() if column else "",
                "link": column[1].a["href"] if column else "",
            }
        )
        stdout.flush()
fix: :bug: forgot shebang 2024-11-30 11:40:39 +00:00			`#!/usr/bin/env python3`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00			`from argparse import ArgumentParser`
			`from csv import DictWriter`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`from datetime import datetime`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00			`from sys import stdout`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`from time import sleep`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00			`from xml.etree import ElementTree`

refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`from bs4 import BeautifulSoup`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00			`from requests import get`

			`parser = ArgumentParser()`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`parser.add_argument(`
refactor: :zap: rework 2hdp keyword 2024-12-25 15:09:36 +00:00			`"--keyword",`
			`action="append",`
fix: :adhesive_bandage: add azaze to 2hdp 2024-12-25 21:08:38 +00:00			`default=["mhdgz", "winks", "pophd", "azaze", "multi"],`
refactor: :zap: rework 2hdp keyword 2024-12-25 15:09:36 +00:00			`help="Prefered words to search on names",`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`)`
fix: :sparkles: add missing exclude keyword 2024-12-24 15:48:05 +00:00			`parser.add_argument(`
			`"--exclude",`
			`action="append",`
			`default=["3d", "dvd", "iso", "av1", "zza"],`
			`help="Excluded words to search on names",`
			`)`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")`
			`parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")`
			`parser.add_argument(`
			`"--feed", required=True, action="append", help="RSS feed to search for new movies"`
			`)`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00			`args = parser.parse_args()`

revert: :rewind: revert to old transmissionrpc 2024-11-30 11:12:15 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`def nb_files(id: int):`
			`response = get(`
			`f"https://www.ygg.re/engine/get_files?torrent={id}",`
			`headers={"Cookie": args.cookie, "User-Agent": args.user_agent},`
			`)`
			`sleep(1)`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`json = response.json()`
			`soup = BeautifulSoup(json["html"], "html.parser")`
			`rows = soup.find_all("tr")`
fix: :bug: fix mkv scanning 2024-12-22 10:42:04 +00:00			`is_mkv = False`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`for row in rows:`
			`columns = row.find_all("td")`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`if columns[1].text.strip()[-3:] == "mkv":`
fix: :bug: fix mkv scanning 2024-12-22 10:42:04 +00:00			`is_mkv = True`
			`break`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
fix: :bug: fix mkv scanning 2024-12-22 10:42:04 +00:00			`return is_mkv and len(rows) == 1`
perf: :zap: finish 2hdp 2024-11-30 11:39:43 +00:00

refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`def parse_size(size):`
			`# https://stackoverflow.com/a/42865957`
			`units = {"Ko": 103, "Mo": 106, "Go": 109, "To": 1012}`
			`number = size[:-2]`
			`unit = size[-2:]`
perf: :zap: finish 2hdp 2024-11-30 11:39:43 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`return int(float(number) * units[unit])`
perf: :zap: finish 2hdp 2024-11-30 11:39:43 +00:00

fix: :sparkles: add missing exclude keyword 2024-12-24 15:48:05 +00:00			`def is_valid(id: int, name: str, size: int, completed: int):`
			`return (`
			`size < parse_size("10Go")`
			`and completed > 100`
			`and not any(exclude.lower() in name.lower() for exclude in args.exclude)`
			`and nb_files(id)`
			`)`
perf: :zap: finish 2hdp 2024-11-30 11:39:43 +00:00
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])`
			`writer.writeheader()`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`for feed in args.feed:`
			`tree = ElementTree.fromstring(get(feed).text)`
			`loop = 0`

			`for item in tree.findall(".//item")[::-1]:`
			`loop += 1`
			`title = str(item.findtext("title")).strip()`
			`season = int(`
			`item.findtext(`
			`"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}`
			`)`
			`or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year`
			`)`
			`episode = int(`
			`item.findtext(`
			`"itunes:episode",`
			`namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},`
			`)`
			`or loop`
			`)`
refactor: :zap: add transmissionrpc_cf and 2hdp script 2024-11-30 00:50:46 +00:00
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`response = get(`
			`f"https://www.ygg.re/engine/search?name={title}"`
			`"&description=&file=&uploader=&category=2145&sub_category=2183"`
			`"&do=search&order=asc&sort=publish_date",`
			`headers={"Cookie": args.cookie, "User-Agent": args.user_agent},`
			`)`
			`sleep(1)`

			`soup = BeautifulSoup(response.text, "html.parser")`
			`rows = soup.select("table.table tr")`
			`column = None`

refactor: :zap: rework 2hdp keyword 2024-12-25 15:09:36 +00:00			`for keyword in args.keyword:`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`for row in rows:`
			`columns = row.find_all("td")`

			`if len(columns) == 0:`
			`continue`

refactor: :zap: rework 2hdp keyword 2024-12-25 15:09:36 +00:00			`if keyword.lower() in columns[1].text.strip().lower():`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`if is_valid(`
			`int(columns[2].a["target"]),`
fix: :sparkles: add missing exclude keyword 2024-12-24 15:48:05 +00:00			`columns[1].text.strip(),`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00			`parse_size(columns[5].text.strip()),`
			`int(columns[6].text.strip()),`
			`):`
			`column = columns`
			`break`
refactor: :zap: rework 2hdp keyword 2024-12-25 15:09:36 +00:00
			`if column:`
			`break`
refactor: :recycle: refacto 2hdp parser 2024-12-21 20:58:33 +00:00
			`writer.writerow(`
			`{`
			`"title": title,`
			`"season": season,`
			`"episode": episode,`
			`"name": column[1].text.strip() if column else "",`
			`"link": column[1].a["href"] if column else "",`
			`}`
			`)`
fix: :bug: flush with row 2024-12-21 21:05:20 +00:00			`stdout.flush()`