dl/commands/2hdp.py

136 lines
4.1 KiB
Python
Raw Normal View History

2024-11-30 11:40:39 +00:00
#!/usr/bin/env python3
from argparse import ArgumentParser
from csv import DictWriter
from datetime import datetime
from sys import stdout
from time import sleep
from xml.etree import ElementTree
from bs4 import BeautifulSoup
from requests import get
parser = ArgumentParser()
parser.add_argument(
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
)
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
"--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args()
def nb_files(id: int):
response = get(
f"https://www.ygg.re/engine/get_files?torrent={id}",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
json = response.json()
soup = BeautifulSoup(json["html"], "html.parser")
rows = soup.find_all("tr")
2024-12-22 10:42:04 +00:00
is_mkv = False
for row in rows:
columns = row.find_all("td")
if columns[1].text.strip()[-3:] == "mkv":
2024-12-22 10:42:04 +00:00
is_mkv = True
break
2024-12-22 10:42:04 +00:00
return is_mkv and len(rows) == 1
2024-11-30 11:39:43 +00:00
def parse_size(size):
# https://stackoverflow.com/a/42865957
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
number = size[:-2]
unit = size[-2:]
2024-11-30 11:39:43 +00:00
return int(float(number) * units[unit])
2024-11-30 11:39:43 +00:00
def is_valid(id: int, size: int, completed: int):
2024-12-24 09:49:58 +00:00
return size < parse_size("10Go") and completed > 100 and nb_files(id)
2024-11-30 11:39:43 +00:00
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader()
for feed in args.feed:
tree = ElementTree.fromstring(get(feed).text)
loop = 0
for item in tree.findall(".//item")[::-1]:
loop += 1
title = str(item.findtext("title")).strip()
season = int(
item.findtext(
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
)
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
)
episode = int(
item.findtext(
"itunes:episode",
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
)
or loop
)
response = get(
f"https://www.ygg.re/engine/search?name={title}"
"&description=&file=&uploader=&category=2145&sub_category=2183"
"&do=search&order=asc&sort=publish_date",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
soup = BeautifulSoup(response.text, "html.parser")
rows = soup.select("table.table tr")
column = None
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
if is_valid(
int(columns[2].a["target"]),
parse_size(columns[5].text.strip()),
int(columns[6].text.strip()),
):
column = columns
break
2024-12-22 18:45:12 +00:00
if not column:
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue
if "multi" in columns[1].text.strip().lower():
if is_valid(
int(columns[2].a["target"]),
parse_size(columns[5].text.strip()),
int(columns[6].text.strip()),
):
column = columns
break
writer.writerow(
{
"title": title,
"season": season,
"episode": episode,
"name": column[1].text.strip() if column else "",
"link": column[1].a["href"] if column else "",
}
)
2024-12-21 21:05:20 +00:00
stdout.flush()