dl/commands/2hdp.py

99 lines
2.5 KiB
Python
Raw Normal View History

2023-09-01 21:26:56 +00:00
#!/usr/bin/env python3
import argparse
import csv
import re
import subprocess
import sys
import bs4
import requests
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--season", type=int)
args = parser.parse_args()
out = csv.DictWriter(sys.stdout, ["EP", "NAME", "URL"])
out.writeheader()
sys.stdout.flush()
page = 1
while True:
page_req = requests.get(
"https://www.2hdp.fr/", params={"season": args.season, "page": page}
)
page_html = bs4.BeautifulSoup(page_req.text, "html.parser")
episodes = page_html.select("a.mx-auto")
if len(episodes) == 0:
break
for episode in episodes:
episode_req = requests.get(str(episode["href"]))
episode_html = bs4.BeautifulSoup(episode_req.text, "html.parser")
raw_title = episode_html.select_one("h1.inline-block")
if not raw_title:
continue
title = raw_title.get_text().strip()
raw_year = episode_html.select_one("div.block")
if not raw_year:
continue
re_year = re.search(r"\((\d*)\)", raw_year.get_text())
if not re_year:
continue
year = re_year.group(1)
raw_ep = episode_html.select_one("strong.flex-shrink-0")
if not raw_ep:
continue
ep = raw_ep.get_text().strip()
try:
output = subprocess.run(
[
"pygg.py",
"-u",
"winks",
"-u",
"mhdgz",
"-y",
year,
title,
],
check=True,
capture_output=True,
)
out.writerow(
{"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()}
)
sys.stdout.flush()
continue
except Exception:
pass
try:
output = subprocess.run(
[
"pygg.py",
"-y",
year,
title,
],
check=True,
capture_output=True,
)
out.writerow(
{"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()}
)
sys.stdout.flush()
continue
except Exception:
pass
out.writerow({"EP": ep, "NAME": title, "URL": "No results"})
sys.stdout.flush()
page += 1