#!/usr/bin/env python3 import argparse import csv import re import subprocess import sys import bs4 import requests parser = argparse.ArgumentParser() parser.add_argument("-s", "--season", type=int) args = parser.parse_args() out = csv.DictWriter(sys.stdout, ["EP", "NAME", "URL"]) out.writeheader() sys.stdout.flush() page = 1 while True: page_req = requests.get( "https://www.2hdp.fr/", params={"season": args.season, "page": page} ) page_html = bs4.BeautifulSoup(page_req.text, "html.parser") episodes = page_html.select("a.mx-auto") if len(episodes) == 0: break for episode in episodes: episode_req = requests.get(str(episode["href"])) episode_html = bs4.BeautifulSoup(episode_req.text, "html.parser") raw_title = episode_html.select_one("h1.inline-block") if not raw_title: continue title = raw_title.get_text().strip() raw_year = episode_html.select_one("div.block") if not raw_year: continue re_year = re.search(r"\((\d*)\)", raw_year.get_text()) if not re_year: continue year = re_year.group(1) raw_ep = episode_html.select_one("strong.flex-shrink-0") if not raw_ep: continue ep = raw_ep.get_text().strip() try: output = subprocess.run( [ "pygg.py", "-u", "winks", "-u", "mhdgz", "-y", year, title, ], check=True, capture_output=True, ) out.writerow( {"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()} ) sys.stdout.flush() continue except Exception: pass try: output = subprocess.run( [ "pygg.py", "-y", year, title, ], check=True, capture_output=True, ) out.writerow( {"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()} ) sys.stdout.flush() continue except Exception: pass out.writerow({"EP": ep, "NAME": title, "URL": "No results"}) sys.stdout.flush() page += 1