2024-11-30 11:40:39 +00:00
|
|
|
#!/usr/bin/env python3
|
2024-11-30 00:50:46 +00:00
|
|
|
from argparse import ArgumentParser
|
|
|
|
from csv import DictWriter
|
2024-12-21 20:58:33 +00:00
|
|
|
from datetime import datetime
|
2024-11-30 00:50:46 +00:00
|
|
|
from sys import stdout
|
2024-12-21 20:58:33 +00:00
|
|
|
from time import sleep
|
2024-11-30 00:50:46 +00:00
|
|
|
from xml.etree import ElementTree
|
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
from bs4 import BeautifulSoup
|
2024-11-30 00:50:46 +00:00
|
|
|
from requests import get
|
|
|
|
|
|
|
|
parser = ArgumentParser()
|
2024-12-21 20:58:33 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
|
|
|
|
)
|
|
|
|
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
|
|
|
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
|
|
|
parser.add_argument(
|
|
|
|
"--feed", required=True, action="append", help="RSS feed to search for new movies"
|
|
|
|
)
|
2024-11-30 00:50:46 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2024-11-30 11:12:15 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
def nb_files(id: int):
|
|
|
|
response = get(
|
|
|
|
f"https://www.ygg.re/engine/get_files?torrent={id}",
|
|
|
|
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
|
|
|
)
|
|
|
|
sleep(1)
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
json = response.json()
|
|
|
|
soup = BeautifulSoup(json["html"], "html.parser")
|
|
|
|
rows = soup.find_all("tr")
|
|
|
|
length = 0
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
for row in rows:
|
|
|
|
columns = row.find_all("td")
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
if columns[1].text.strip()[-3:] == "mkv":
|
|
|
|
length += 1
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
return length
|
2024-11-30 11:39:43 +00:00
|
|
|
|
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
def parse_size(size):
|
|
|
|
# https://stackoverflow.com/a/42865957
|
|
|
|
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
|
|
|
|
number = size[:-2]
|
|
|
|
unit = size[-2:]
|
2024-11-30 11:39:43 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
return int(float(number) * units[unit])
|
2024-11-30 11:39:43 +00:00
|
|
|
|
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
def is_valid(id: int, size: int, completed: int):
|
|
|
|
return size < parse_size("10Go") and completed > 10 and nb_files(id) == 1
|
2024-11-30 11:39:43 +00:00
|
|
|
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
|
|
|
writer.writeheader()
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
for feed in args.feed:
|
|
|
|
tree = ElementTree.fromstring(get(feed).text)
|
|
|
|
loop = 0
|
|
|
|
|
|
|
|
for item in tree.findall(".//item")[::-1]:
|
|
|
|
loop += 1
|
|
|
|
title = str(item.findtext("title")).strip()
|
|
|
|
season = int(
|
|
|
|
item.findtext(
|
|
|
|
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
|
|
|
|
)
|
|
|
|
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
|
|
|
|
)
|
|
|
|
episode = int(
|
|
|
|
item.findtext(
|
|
|
|
"itunes:episode",
|
|
|
|
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
|
|
|
|
)
|
|
|
|
or loop
|
|
|
|
)
|
2024-11-30 00:50:46 +00:00
|
|
|
|
2024-12-21 20:58:33 +00:00
|
|
|
response = get(
|
|
|
|
f"https://www.ygg.re/engine/search?name={title}"
|
|
|
|
"&description=&file=&uploader=&category=2145&sub_category=2183"
|
|
|
|
"&do=search&order=asc&sort=publish_date",
|
|
|
|
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
|
|
|
)
|
|
|
|
sleep(1)
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
rows = soup.select("table.table tr")
|
|
|
|
column = None
|
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
columns = row.find_all("td")
|
|
|
|
|
|
|
|
if len(columns) == 0:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
|
|
|
|
if is_valid(
|
|
|
|
int(columns[2].a["target"]),
|
|
|
|
parse_size(columns[5].text.strip()),
|
|
|
|
int(columns[6].text.strip()),
|
|
|
|
):
|
|
|
|
column = columns
|
|
|
|
break
|
|
|
|
|
|
|
|
if column is None:
|
|
|
|
for row in rows:
|
|
|
|
columns = row.find_all("td")
|
|
|
|
|
|
|
|
if len(columns) == 0:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if "multi" in columns[1].text.strip().lower():
|
|
|
|
if is_valid(
|
|
|
|
int(columns[2].a["target"]),
|
|
|
|
parse_size(columns[5].text.strip()),
|
|
|
|
int(columns[6].text.strip()),
|
|
|
|
):
|
|
|
|
column = columns
|
|
|
|
break
|
|
|
|
|
|
|
|
writer.writerow(
|
|
|
|
{
|
|
|
|
"title": title,
|
|
|
|
"season": season,
|
|
|
|
"episode": episode,
|
|
|
|
"name": column[1].text.strip() if column else "",
|
|
|
|
"link": column[1].a["href"] if column else "",
|
|
|
|
}
|
|
|
|
)
|