Michel Roux
64e5eb10c7
All checks were successful
continuous-integration/drone/push Build is passing
141 lines
2.5 KiB
Python
141 lines
2.5 KiB
Python
import csv
|
|
import sys
|
|
|
|
import bs4
|
|
import requests
|
|
|
|
|
|
WIKI_URL = (
|
|
"https://fr.wikipedia.org/wiki/Liste_des_%C3%A9pisodes_de_D%C3%A9tective_Conan"
|
|
)
|
|
GOOD_COLOR = ["#ee8000", "#80ffff"]
|
|
CSV_COLUMNS = ["SW", "EW", "Arc", "ST", "ET", "Titre", "Date"]
|
|
VF_UNTIL = 219
|
|
|
|
EPS_BY_SEASONS = [
|
|
28,
|
|
26,
|
|
28,
|
|
24,
|
|
28,
|
|
28,
|
|
31,
|
|
26,
|
|
35,
|
|
31,
|
|
30,
|
|
38,
|
|
36,
|
|
37,
|
|
39,
|
|
25,
|
|
33,
|
|
42,
|
|
40,
|
|
40,
|
|
35,
|
|
43,
|
|
39,
|
|
41,
|
|
83,
|
|
40,
|
|
38,
|
|
28,
|
|
40,
|
|
35,
|
|
99,
|
|
]
|
|
|
|
ARCS = {
|
|
1: "Conan",
|
|
129: "Haibara",
|
|
179: "Vermouth",
|
|
345: "Phone",
|
|
425: "Kir",
|
|
505: "Bourbon",
|
|
705: "Train",
|
|
783: "Rum",
|
|
1029: "Police",
|
|
}
|
|
|
|
|
|
def extract_background(style):
|
|
for declaration in style.split(";"):
|
|
if declaration:
|
|
prop, value = declaration.split(":")
|
|
if prop.strip() == "background":
|
|
return value.strip().lower()
|
|
|
|
|
|
def thetvdb_season(episode):
|
|
season = 0
|
|
index = 0
|
|
|
|
for numb in EPS_BY_SEASONS:
|
|
index += numb
|
|
season += 1
|
|
|
|
if episode <= index:
|
|
return season
|
|
|
|
|
|
def thetvdb_episode(episode):
|
|
season = thetvdb_season(episode)
|
|
numb = 0
|
|
|
|
for index in range(0, season - 1):
|
|
numb += EPS_BY_SEASONS[index]
|
|
|
|
return -(numb - episode)
|
|
|
|
|
|
def current_arc(episode):
|
|
for numb in sorted(ARCS, reverse=True):
|
|
if episode > numb:
|
|
return ARCS[numb]
|
|
|
|
|
|
html = requests.get(WIKI_URL)
|
|
soup = bs4.BeautifulSoup(html.text, "html.parser")
|
|
tables = soup.select("table.wikitable")
|
|
|
|
out = csv.DictWriter(sys.stdout, CSV_COLUMNS)
|
|
out.writeheader()
|
|
|
|
for season, table in enumerate(tables):
|
|
if not season:
|
|
continue
|
|
|
|
trs = table.select("tr")
|
|
|
|
for row, tr in enumerate(trs):
|
|
if row < 1:
|
|
continue
|
|
|
|
tds = tr.select("td")
|
|
if len(tds) < 4:
|
|
continue
|
|
|
|
try:
|
|
episode = int(tds[0].text.split(" ")[0].split("/")[0])
|
|
except ValueError:
|
|
continue
|
|
|
|
if episode <= VF_UNTIL:
|
|
continue
|
|
|
|
color = extract_background(tr["style"])
|
|
|
|
if not color or color in GOOD_COLOR:
|
|
out.writerow(
|
|
{
|
|
"SW": season,
|
|
"EW": episode,
|
|
"Arc": current_arc(episode),
|
|
"ST": thetvdb_season(episode),
|
|
"ET": thetvdb_episode(episode),
|
|
"Titre": tds[1].text.strip(),
|
|
"Date": tds[4].text.strip(),
|
|
}
|
|
)
|