101 lines
2.5 KiB
Python
101 lines
2.5 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import argparse
|
||
|
import csv
|
||
|
import re
|
||
|
import subprocess
|
||
|
import sys
|
||
|
|
||
|
import bs4
|
||
|
import requests
|
||
|
|
||
|
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument("-s", "--season", type=int)
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
out = csv.DictWriter(sys.stdout, ["EP", "NAME", "URL"])
|
||
|
out.writeheader()
|
||
|
sys.stdout.flush()
|
||
|
page = 1
|
||
|
|
||
|
while True:
|
||
|
page_req = requests.get(
|
||
|
"https://www.2hdp.fr/", params={"season": args.season, "page": page}
|
||
|
)
|
||
|
page_html = bs4.BeautifulSoup(page_req.text, "html.parser")
|
||
|
episodes = page_html.select("a.mx-auto")
|
||
|
|
||
|
if len(episodes) == 0:
|
||
|
break
|
||
|
|
||
|
for episode in episodes:
|
||
|
episode_req = requests.get(str(episode["href"]))
|
||
|
episode_html = bs4.BeautifulSoup(episode_req.text, "html.parser")
|
||
|
|
||
|
raw_title = episode_html.select_one("h1.inline-block")
|
||
|
if not raw_title:
|
||
|
continue
|
||
|
title = raw_title.get_text().strip()
|
||
|
|
||
|
raw_year = episode_html.select_one("div.block")
|
||
|
if not raw_year:
|
||
|
continue
|
||
|
re_year = re.search(r"\((\d*)\)", raw_year.get_text())
|
||
|
if not re_year:
|
||
|
continue
|
||
|
year = re_year.group(1)
|
||
|
|
||
|
raw_ep = episode_html.select_one("strong.flex-shrink-0")
|
||
|
if not raw_ep:
|
||
|
continue
|
||
|
ep = raw_ep.get_text().strip()
|
||
|
|
||
|
try:
|
||
|
output = subprocess.run(
|
||
|
[
|
||
|
"python",
|
||
|
"pygg.py",
|
||
|
"-u",
|
||
|
"winks",
|
||
|
"-u",
|
||
|
"mhdgz",
|
||
|
"-y",
|
||
|
year,
|
||
|
title,
|
||
|
],
|
||
|
check=True,
|
||
|
capture_output=True,
|
||
|
)
|
||
|
out.writerow(
|
||
|
{"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()}
|
||
|
)
|
||
|
sys.stdout.flush()
|
||
|
continue
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
output = subprocess.run(
|
||
|
[
|
||
|
"python",
|
||
|
"pygg.py",
|
||
|
"-y",
|
||
|
year,
|
||
|
title,
|
||
|
],
|
||
|
check=True,
|
||
|
capture_output=True,
|
||
|
)
|
||
|
out.writerow(
|
||
|
{"EP": ep, "NAME": title, "URL": output.stdout.decode().strip()}
|
||
|
)
|
||
|
sys.stdout.flush()
|
||
|
continue
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
out.writerow({"EP": ep, "NAME": title, "URL": "No results"})
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
page += 1
|