dl/commands/2hdp.py
Michel Roux f05fd79dd9
All checks were successful
dl / lint (push) Successful in 1m34s
dl / docker (push) Successful in 4m20s
fix: 🐛 fix bad nb_files
2024-12-27 21:34:22 +01:00

142 lines
4.2 KiB
Python
Executable File

#!/usr/bin/env python3
from argparse import ArgumentParser
from csv import DictWriter
from datetime import datetime
from sys import stdout
from time import sleep
from xml.etree import ElementTree
from bs4 import BeautifulSoup
from requests import JSONDecodeError, get
parser = ArgumentParser()
parser.add_argument(
"--keyword",
action="append",
default=["psaro", "mhdgz", "winks", "pophd", "azaze", "multi"],
help="Prefered words to search on names",
)
parser.add_argument(
"--exclude",
action="append",
default=["3d", "dvd", "iso", "av1", "zza"],
help="Excluded words to search on names",
)
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
"--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args()
def nb_files(id: int):
try:
response = get(
f"https://www.ygg.re/engine/get_files?torrent={id}",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
json = response.json()
soup = BeautifulSoup(json["html"], "html.parser")
rows = soup.find_all("tr")
is_mkv = False
for row in rows:
columns = row.find_all("td")
if columns[1].text.strip()[-3:] == "mkv":
is_mkv = True
break
return is_mkv and len(rows) == 1
except JSONDecodeError:
return False
def parse_size(size):
# https://stackoverflow.com/a/42865957
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
number = size[:-2]
unit = size[-2:]
return int(float(number) * units[unit])
def is_valid(id: int, name: str, size: int, completed: int):
return (
size < parse_size("10Go")
and completed > 100
and not any(exclude.lower() in name.lower() for exclude in args.exclude)
and nb_files(id)
)
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader()
for feed in args.feed:
tree = ElementTree.fromstring(get(feed).text)
loop = 0
for item in tree.findall(".//item")[::-1]:
loop += 1
title = str(item.findtext("title")).strip()
season = int(
item.findtext(
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
)
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
)
episode = int(
item.findtext(
"itunes:episode",
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
)
or loop
)
response = get(
f"https://www.ygg.re/engine/search?name={title}"
"&description=&file=&uploader=&category=2145&sub_category=2183"
"&do=search&order=asc&sort=publish_date",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
soup = BeautifulSoup(response.text, "html.parser")
rows = soup.select("table.table tr")
column = None
for keyword in args.keyword:
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue
if keyword.lower() in columns[1].text.strip().lower():
if is_valid(
int(columns[2].a["target"]),
columns[1].text.strip(),
parse_size(columns[5].text.strip()),
int(columns[6].text.strip()),
):
column = columns
break
if column:
break
writer.writerow(
{
"title": title,
"season": season,
"episode": episode,
"name": column[1].text.strip() if column else "",
"link": column[1].a["href"] if column else "",
}
)
stdout.flush()