dl/commands/2hdp.py

146 lines
3.9 KiB
Python
Raw Normal View History

2024-11-30 12:40:39 +01:00
#!/usr/bin/env python3
from argparse import ArgumentParser
from csv import DictWriter
from datetime import datetime
2024-12-22 18:57:16 +01:00
from sys import stderr, stdout
from time import sleep
from xml.etree import ElementTree
from bs4 import BeautifulSoup
from requests import get
parser = ArgumentParser()
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
"--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args()
def nb_files(id: int):
response = get(
f"https://www.ygg.re/engine/get_files?torrent={id}",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
json = response.json()
soup = BeautifulSoup(json["html"], "html.parser")
rows = soup.find_all("tr")
2024-12-22 11:42:04 +01:00
is_mkv = False
for row in rows:
columns = row.find_all("td")
if columns[1].text.strip()[-3:] == "mkv":
2024-12-22 11:42:04 +01:00
is_mkv = True
break
2024-12-22 11:42:04 +01:00
return is_mkv and len(rows) == 1
2024-11-30 12:39:43 +01:00
def parse_size(size):
# https://stackoverflow.com/a/42865957
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
number = size[:-2]
unit = size[-2:]
2024-11-30 12:39:43 +01:00
return int(float(number) * units[unit])
2024-11-30 12:39:43 +01:00
def is_valid(id: int, size: int, completed: int):
2024-12-22 11:42:04 +01:00
return size < parse_size("10Go") and completed > 10 and nb_files(id)
2024-11-30 12:39:43 +01:00
2024-12-22 18:57:16 +01:00
def parse_ygg(title: str, page: int = 0):
movies = []
response = get(
f"https://www.ygg.re/engine/search?name={title}"
+ "&description=&file=&uploader=&category=2145&sub_category=2183"
+ "&do=search&order=asc&sort=publish_date&page="
+ str(page),
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
soup = BeautifulSoup(response.text, "html.parser")
rows = soup.select("table.table tr")
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue
movies.append(columns)
return movies
mhdgz = []
page = 0
results = 1
while results > 0:
print(f"Parsing YGG page {page}...", file=stderr)
movies = parse_ygg("mhdgz", page)
mhdgz += movies
results = len(movies)
page += 50
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader()
2024-12-22 18:57:16 +01:00
stdout.flush()
for feed in args.feed:
tree = ElementTree.fromstring(get(feed).text)
loop = 0
for item in tree.findall(".//item")[::-1]:
loop += 1
2024-12-22 18:57:16 +01:00
movie = None
title = str(item.findtext("title")).strip()
season = int(
item.findtext(
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
)
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
)
episode = int(
item.findtext(
"itunes:episode",
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
)
or loop
)
2024-12-22 18:57:16 +01:00
for mhd in mhdgz:
if title.lower() in mhd[1].text.strip().lower():
movie = mhd
break
2024-12-22 18:57:16 +01:00
if not movie:
rows = parse_ygg(title)
for row in rows:
2024-12-22 18:57:16 +01:00
if "multi" in row[1].text.strip().lower():
if is_valid(
2024-12-22 18:57:16 +01:00
int(row[2].a["target"]),
parse_size(row[5].text.strip()),
int(row[6].text.strip()),
):
2024-12-22 18:57:16 +01:00
movie = row
break
writer.writerow(
{
"title": title,
"season": season,
"episode": episode,
2024-12-22 18:57:16 +01:00
"name": movie[1].text.strip() if movie else "",
"link": movie[1].a["href"] if movie else "",
}
)
2024-12-21 22:05:20 +01:00
stdout.flush()