diff --git a/commands/2hdp.py b/commands/2hdp.py index 086d9ed..bdd35b9 100755 --- a/commands/2hdp.py +++ b/commands/2hdp.py @@ -2,7 +2,7 @@ from argparse import ArgumentParser from csv import DictWriter from datetime import datetime -from sys import stdout +from sys import stderr, stdout from time import sleep from xml.etree import ElementTree @@ -10,9 +10,6 @@ from bs4 import BeautifulSoup from requests import get parser = ArgumentParser() -parser.add_argument( - "--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names" -) parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare") parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare") parser.add_argument( @@ -56,8 +53,45 @@ def is_valid(id: int, size: int, completed: int): return size < parse_size("10Go") and completed > 10 and nb_files(id) +def parse_ygg(title: str, page: int = 0): + movies = [] + response = get( + f"https://www.ygg.re/engine/search?name={title}" + + "&description=&file=&uploader=&category=2145&sub_category=2183" + + "&do=search&order=asc&sort=publish_date&page=" + + str(page), + headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, + ) + sleep(1) + + soup = BeautifulSoup(response.text, "html.parser") + rows = soup.select("table.table tr") + + for row in rows: + columns = row.find_all("td") + + if len(columns) == 0: + continue + + movies.append(columns) + + return movies + + +mhdgz = [] +page = 0 +results = 1 + +while results > 0: + print(f"Parsing YGG page {page}...", file=stderr) + movies = parse_ygg("mhdgz", page) + mhdgz += movies + results = len(movies) + page += 50 + writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"]) writer.writeheader() +stdout.flush() for feed in args.feed: tree = ElementTree.fromstring(get(feed).text) @@ -65,6 +99,7 @@ for feed in args.feed: for item in tree.findall(".//item")[::-1]: loop += 1 + movie = None title = str(item.findtext("title")).strip() season = int( item.findtext( @@ -80,47 +115,22 @@ for feed in args.feed: or loop ) - response = get( - f"https://www.ygg.re/engine/search?name={title}" - "&description=&file=&uploader=&category=2145&sub_category=2183" - "&do=search&order=asc&sort=publish_date", - headers={"Cookie": args.cookie, "User-Agent": args.user_agent}, - ) - sleep(1) + for mhd in mhdgz: + if title.lower() in mhd[1].text.strip().lower(): + movie = mhd + break - soup = BeautifulSoup(response.text, "html.parser") - rows = soup.select("table.table tr") - column = None + if not movie: + rows = parse_ygg(title) - for row in rows: - columns = row.find_all("td") - - if len(columns) == 0: - continue - - if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword): - if is_valid( - int(columns[2].a["target"]), - parse_size(columns[5].text.strip()), - int(columns[6].text.strip()), - ): - column = columns - break - - if column is None: for row in rows: - columns = row.find_all("td") - - if len(columns) == 0: - continue - - if "multi" in columns[1].text.strip().lower(): + if "multi" in row[1].text.strip().lower(): if is_valid( - int(columns[2].a["target"]), - parse_size(columns[5].text.strip()), - int(columns[6].text.strip()), + int(row[2].a["target"]), + parse_size(row[5].text.strip()), + int(row[6].text.strip()), ): - column = columns + movie = row break writer.writerow( @@ -128,8 +138,8 @@ for feed in args.feed: "title": title, "season": season, "episode": episode, - "name": column[1].text.strip() if column else "", - "link": column[1].a["href"] if column else "", + "name": movie[1].text.strip() if movie else "", + "link": movie[1].a["href"] if movie else "", } ) stdout.flush()