dl/commands/2hdp.py
Michel Roux 97931f0c56
All checks were successful
dl / lint (push) Successful in 1m45s
dl / docker (push) Successful in 2m48s
fix: improve mhdgz performance
2024-12-22 18:57:16 +01:00

146 lines
3.9 KiB
Python
Executable File

#!/usr/bin/env python3
from argparse import ArgumentParser
from csv import DictWriter
from datetime import datetime
from sys import stderr, stdout
from time import sleep
from xml.etree import ElementTree
from bs4 import BeautifulSoup
from requests import get
parser = ArgumentParser()
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
"--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args()
def nb_files(id: int):
response = get(
f"https://www.ygg.re/engine/get_files?torrent={id}",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
json = response.json()
soup = BeautifulSoup(json["html"], "html.parser")
rows = soup.find_all("tr")
is_mkv = False
for row in rows:
columns = row.find_all("td")
if columns[1].text.strip()[-3:] == "mkv":
is_mkv = True
break
return is_mkv and len(rows) == 1
def parse_size(size):
# https://stackoverflow.com/a/42865957
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
number = size[:-2]
unit = size[-2:]
return int(float(number) * units[unit])
def is_valid(id: int, size: int, completed: int):
return size < parse_size("10Go") and completed > 10 and nb_files(id)
def parse_ygg(title: str, page: int = 0):
movies = []
response = get(
f"https://www.ygg.re/engine/search?name={title}"
+ "&description=&file=&uploader=&category=2145&sub_category=2183"
+ "&do=search&order=asc&sort=publish_date&page="
+ str(page),
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
soup = BeautifulSoup(response.text, "html.parser")
rows = soup.select("table.table tr")
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue
movies.append(columns)
return movies
mhdgz = []
page = 0
results = 1
while results > 0:
print(f"Parsing YGG page {page}...", file=stderr)
movies = parse_ygg("mhdgz", page)
mhdgz += movies
results = len(movies)
page += 50
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader()
stdout.flush()
for feed in args.feed:
tree = ElementTree.fromstring(get(feed).text)
loop = 0
for item in tree.findall(".//item")[::-1]:
loop += 1
movie = None
title = str(item.findtext("title")).strip()
season = int(
item.findtext(
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
)
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
)
episode = int(
item.findtext(
"itunes:episode",
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
)
or loop
)
for mhd in mhdgz:
if title.lower() in mhd[1].text.strip().lower():
movie = mhd
break
if not movie:
rows = parse_ygg(title)
for row in rows:
if "multi" in row[1].text.strip().lower():
if is_valid(
int(row[2].a["target"]),
parse_size(row[5].text.strip()),
int(row[6].text.strip()),
):
movie = row
break
writer.writerow(
{
"title": title,
"season": season,
"episode": episode,
"name": movie[1].text.strip() if movie else "",
"link": movie[1].a["href"] if movie else "",
}
)
stdout.flush()