import csv import sys import bs4 import requests WIKI_URL = ( "https://fr.wikipedia.org/wiki/Liste_des_%C3%A9pisodes_de_D%C3%A9tective_Conan" ) GOOD_COLOR = ["#ee8000", "#80ffff"] CSV_COLUMNS = ["SW", "EW", "Arc", "ST", "ET", "Titre", "Date"] VF_UNTIL = 219 EPS_BY_SEASONS = [ 28, 26, 28, 24, 28, 28, 31, 26, 35, 31, 30, 38, 36, 37, 39, 25, 33, 42, 40, 40, 35, 43, 39, 41, 83, 40, 38, 28, 40, 35, 99, ] ARCS = { 1: "Conan", 129: "Haibara", 179: "Vermouth", 345: "Phone", 425: "Kir", 505: "Bourbon", 705: "Train", 783: "Rum", 1029: "Police", } def extract_background(style): for declaration in style.split(";"): if declaration: prop, value = declaration.split(":") if prop.strip() == "background": return value.strip().lower() def thetvdb_season(episode): season = 0 index = 0 for numb in EPS_BY_SEASONS: index += numb season += 1 if episode <= index: return season def thetvdb_episode(episode): season = thetvdb_season(episode) numb = 0 for index in range(0, season - 1): numb += EPS_BY_SEASONS[index] return -(numb - episode) def current_arc(episode): for numb in sorted(ARCS, reverse=True): if episode >= numb: return ARCS[numb] html = requests.get(WIKI_URL) soup = bs4.BeautifulSoup(html.text, "html.parser") tables = soup.select("table.wikitable") out = csv.DictWriter(sys.stdout, CSV_COLUMNS) out.writeheader() for season, table in enumerate(tables): if not season: continue trs = table.select("tr") for row, tr in enumerate(trs): if row < 1: continue tds = tr.select("td") if len(tds) < 4: continue try: episode = int(tds[0].text.split(" ")[0].split("/")[0]) except ValueError: continue if episode <= VF_UNTIL: continue color = extract_background(tr["style"]) if not color or color in GOOD_COLOR: out.writerow( { "SW": season, "EW": episode, "Arc": current_arc(episode), "ST": thetvdb_season(episode), "ET": thetvdb_episode(episode), "Titre": tds[1].text.strip(), "Date": tds[4].text.strip(), } )