#!/usr/bin/env python3 from argparse import ArgumentParser from csv import DictWriter from datetime import datetime from sys import stdout from time import sleep from xml.etree import ElementTree from bs4 import BeautifulSoup from requests import get parser = ArgumentParser() parser.add_argument( "--keyword", action="append", default=["psaro", "mhdgz", "winks", "pophd", "azaze", "multi"], help="Prefered words to search on names", ) parser.add_argument( "--exclude", action="append", default=["3d", "dvd", "iso", "av1", "zza"], help="Excluded words to search on names", ) parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare") parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare") parser.add_argument( "--feed", required=True, action="append", help="RSS feed to search for new movies" ) args = parser.parse_args() def nb_files(id: int): response = get( f"https://www.ygg.re/engine/get_files?torrent={id}", headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, ) sleep(1) json = response.json() soup = BeautifulSoup(json["html"], "html.parser") rows = soup.find_all("tr") is_mkv = False for row in rows: columns = row.find_all("td") if columns[1].text.strip()[-3:] == "mkv": is_mkv = True break return is_mkv and len(rows) == 1 def parse_size(size): # https://stackoverflow.com/a/42865957 units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12} number = size[:-2] unit = size[-2:] return int(float(number) * units[unit]) def is_valid(id: int, name: str, size: int, completed: int): return ( size < parse_size("10Go") and completed > 100 and not any(exclude.lower() in name.lower() for exclude in args.exclude) and nb_files(id) ) writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"]) writer.writeheader() for feed in args.feed: tree = ElementTree.fromstring(get(feed).text) loop = 0 for item in tree.findall(".//item")[::-1]: loop += 1 title = str(item.findtext("title")).strip() season = int( item.findtext( "itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"} ) or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year ) episode = int( item.findtext( "itunes:episode", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}, ) or loop ) response = get( f"https://www.ygg.re/engine/search?name={title}" "&description=&file=&uploader=&category=2145&sub_category=2183" "&do=search&order=asc&sort=publish_date", headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, ) sleep(1) soup = BeautifulSoup(response.text, "html.parser") rows = soup.select("table.table tr") column = None for keyword in args.keyword: for row in rows: columns = row.find_all("td") if len(columns) == 0: continue if keyword.lower() in columns[1].text.strip().lower(): if is_valid( int(columns[2].a["target"]), columns[1].text.strip(), parse_size(columns[5].text.strip()), int(columns[6].text.strip()), ): column = columns break if column: break writer.writerow( { "title": title, "season": season, "episode": episode, "name": column[1].text.strip() if column else "", "link": column[1].a["href"] if column else "", } ) stdout.flush()