#!/usr/bin/env python3 from argparse import ArgumentParser from csv import DictWriter from datetime import datetime from sys import stderr, stdout from time import sleep from xml.etree import ElementTree from bs4 import BeautifulSoup from requests import get parser = ArgumentParser() parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare") parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare") parser.add_argument( "--feed", required=True, action="append", help="RSS feed to search for new movies" ) args = parser.parse_args() def nb_files(id: int): response = get( f"https://www.ygg.re/engine/get_files?torrent={id}", headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, ) sleep(1) json = response.json() soup = BeautifulSoup(json["html"], "html.parser") rows = soup.find_all("tr") is_mkv = False for row in rows: columns = row.find_all("td") if columns[1].text.strip()[-3:] == "mkv": is_mkv = True break return is_mkv and len(rows) == 1 def parse_size(size): # https://stackoverflow.com/a/42865957 units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12} number = size[:-2] unit = size[-2:] return int(float(number) * units[unit]) def is_valid(id: int, size: int, completed: int): return size < parse_size("10Go") and completed > 10 and nb_files(id) def parse_ygg(title: str, page: int = 0): movies = [] response = get( f"https://www.ygg.re/engine/search?name={title}" + "&description=&file=&uploader=&category=2145&sub_category=2183" + "&do=search&order=asc&sort=publish_date&page=" + str(page), headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, ) sleep(1) soup = BeautifulSoup(response.text, "html.parser") rows = soup.select("table.table tr") for row in rows: columns = row.find_all("td") if len(columns) == 0: continue movies.append(columns) return movies mhdgz = [] page = 0 results = 1 while results > 0: print(f"Parsing YGG page {page}...", file=stderr) movies = parse_ygg("mhdgz", page) mhdgz += movies results = len(movies) page += 50 writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"]) writer.writeheader() stdout.flush() for feed in args.feed: tree = ElementTree.fromstring(get(feed).text) loop = 0 for item in tree.findall(".//item")[::-1]: loop += 1 movie = None title = str(item.findtext("title")).strip() season = int( item.findtext( "itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"} ) or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year ) episode = int( item.findtext( "itunes:episode", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}, ) or loop ) for mhd in mhdgz: if title.lower() in mhd[1].text.strip().lower(): movie = mhd break if not movie: rows = parse_ygg(title) for row in rows: if "multi" in row[1].text.strip().lower(): if is_valid( int(row[2].a["target"]), parse_size(row[5].text.strip()), int(row[6].text.strip()), ): movie = row break writer.writerow( { "title": title, "season": season, "episode": episode, "name": movie[1].text.strip() if movie else "", "link": movie[1].a["href"] if movie else "", } ) stdout.flush()