fix: ⚡ improve mhdgz performance
This commit is contained in:
parent
2537713f9f
commit
97931f0c56
@ -2,7 +2,7 @@
|
||||
from argparse import ArgumentParser
|
||||
from csv import DictWriter
|
||||
from datetime import datetime
|
||||
from sys import stdout
|
||||
from sys import stderr, stdout
|
||||
from time import sleep
|
||||
from xml.etree import ElementTree
|
||||
|
||||
@ -10,9 +10,6 @@ from bs4 import BeautifulSoup
|
||||
from requests import get
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
|
||||
)
|
||||
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
||||
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
||||
parser.add_argument(
|
||||
@ -56,8 +53,45 @@ def is_valid(id: int, size: int, completed: int):
|
||||
return size < parse_size("10Go") and completed > 10 and nb_files(id)
|
||||
|
||||
|
||||
def parse_ygg(title: str, page: int = 0):
|
||||
movies = []
|
||||
response = get(
|
||||
f"https://www.ygg.re/engine/search?name={title}"
|
||||
+ "&description=&file=&uploader=&category=2145&sub_category=2183"
|
||||
+ "&do=search&order=asc&sort=publish_date&page="
|
||||
+ str(page),
|
||||
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||
)
|
||||
sleep(1)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
rows = soup.select("table.table tr")
|
||||
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
if len(columns) == 0:
|
||||
continue
|
||||
|
||||
movies.append(columns)
|
||||
|
||||
return movies
|
||||
|
||||
|
||||
mhdgz = []
|
||||
page = 0
|
||||
results = 1
|
||||
|
||||
while results > 0:
|
||||
print(f"Parsing YGG page {page}...", file=stderr)
|
||||
movies = parse_ygg("mhdgz", page)
|
||||
mhdgz += movies
|
||||
results = len(movies)
|
||||
page += 50
|
||||
|
||||
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
||||
writer.writeheader()
|
||||
stdout.flush()
|
||||
|
||||
for feed in args.feed:
|
||||
tree = ElementTree.fromstring(get(feed).text)
|
||||
@ -65,6 +99,7 @@ for feed in args.feed:
|
||||
|
||||
for item in tree.findall(".//item")[::-1]:
|
||||
loop += 1
|
||||
movie = None
|
||||
title = str(item.findtext("title")).strip()
|
||||
season = int(
|
||||
item.findtext(
|
||||
@ -80,47 +115,22 @@ for feed in args.feed:
|
||||
or loop
|
||||
)
|
||||
|
||||
response = get(
|
||||
f"https://www.ygg.re/engine/search?name={title}"
|
||||
"&description=&file=&uploader=&category=2145&sub_category=2183"
|
||||
"&do=search&order=asc&sort=publish_date",
|
||||
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||
)
|
||||
sleep(1)
|
||||
for mhd in mhdgz:
|
||||
if title.lower() in mhd[1].text.strip().lower():
|
||||
movie = mhd
|
||||
break
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
rows = soup.select("table.table tr")
|
||||
column = None
|
||||
if not movie:
|
||||
rows = parse_ygg(title)
|
||||
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
if len(columns) == 0:
|
||||
continue
|
||||
|
||||
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
|
||||
if is_valid(
|
||||
int(columns[2].a["target"]),
|
||||
parse_size(columns[5].text.strip()),
|
||||
int(columns[6].text.strip()),
|
||||
):
|
||||
column = columns
|
||||
break
|
||||
|
||||
if column is None:
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
if len(columns) == 0:
|
||||
continue
|
||||
|
||||
if "multi" in columns[1].text.strip().lower():
|
||||
if "multi" in row[1].text.strip().lower():
|
||||
if is_valid(
|
||||
int(columns[2].a["target"]),
|
||||
parse_size(columns[5].text.strip()),
|
||||
int(columns[6].text.strip()),
|
||||
int(row[2].a["target"]),
|
||||
parse_size(row[5].text.strip()),
|
||||
int(row[6].text.strip()),
|
||||
):
|
||||
column = columns
|
||||
movie = row
|
||||
break
|
||||
|
||||
writer.writerow(
|
||||
@ -128,8 +138,8 @@ for feed in args.feed:
|
||||
"title": title,
|
||||
"season": season,
|
||||
"episode": episode,
|
||||
"name": column[1].text.strip() if column else "",
|
||||
"link": column[1].a["href"] if column else "",
|
||||
"name": movie[1].text.strip() if movie else "",
|
||||
"link": movie[1].a["href"] if movie else "",
|
||||
}
|
||||
)
|
||||
stdout.flush()
|
||||
|
Loading…
x
Reference in New Issue
Block a user