fix: ⚡ improve mhdgz performance
This commit is contained in:
parent
2537713f9f
commit
97931f0c56
@ -2,7 +2,7 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from csv import DictWriter
|
from csv import DictWriter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from sys import stdout
|
from sys import stderr, stdout
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
@ -10,9 +10,6 @@ from bs4 import BeautifulSoup
|
|||||||
from requests import get
|
from requests import get
|
||||||
|
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
parser.add_argument(
|
|
||||||
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
|
|
||||||
)
|
|
||||||
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
||||||
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -56,8 +53,45 @@ def is_valid(id: int, size: int, completed: int):
|
|||||||
return size < parse_size("10Go") and completed > 10 and nb_files(id)
|
return size < parse_size("10Go") and completed > 10 and nb_files(id)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_ygg(title: str, page: int = 0):
|
||||||
|
movies = []
|
||||||
|
response = get(
|
||||||
|
f"https://www.ygg.re/engine/search?name={title}"
|
||||||
|
+ "&description=&file=&uploader=&category=2145&sub_category=2183"
|
||||||
|
+ "&do=search&order=asc&sort=publish_date&page="
|
||||||
|
+ str(page),
|
||||||
|
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||||
|
)
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
rows = soup.select("table.table tr")
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
columns = row.find_all("td")
|
||||||
|
|
||||||
|
if len(columns) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
movies.append(columns)
|
||||||
|
|
||||||
|
return movies
|
||||||
|
|
||||||
|
|
||||||
|
mhdgz = []
|
||||||
|
page = 0
|
||||||
|
results = 1
|
||||||
|
|
||||||
|
while results > 0:
|
||||||
|
print(f"Parsing YGG page {page}...", file=stderr)
|
||||||
|
movies = parse_ygg("mhdgz", page)
|
||||||
|
mhdgz += movies
|
||||||
|
results = len(movies)
|
||||||
|
page += 50
|
||||||
|
|
||||||
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
stdout.flush()
|
||||||
|
|
||||||
for feed in args.feed:
|
for feed in args.feed:
|
||||||
tree = ElementTree.fromstring(get(feed).text)
|
tree = ElementTree.fromstring(get(feed).text)
|
||||||
@ -65,6 +99,7 @@ for feed in args.feed:
|
|||||||
|
|
||||||
for item in tree.findall(".//item")[::-1]:
|
for item in tree.findall(".//item")[::-1]:
|
||||||
loop += 1
|
loop += 1
|
||||||
|
movie = None
|
||||||
title = str(item.findtext("title")).strip()
|
title = str(item.findtext("title")).strip()
|
||||||
season = int(
|
season = int(
|
||||||
item.findtext(
|
item.findtext(
|
||||||
@ -80,47 +115,22 @@ for feed in args.feed:
|
|||||||
or loop
|
or loop
|
||||||
)
|
)
|
||||||
|
|
||||||
response = get(
|
for mhd in mhdgz:
|
||||||
f"https://www.ygg.re/engine/search?name={title}"
|
if title.lower() in mhd[1].text.strip().lower():
|
||||||
"&description=&file=&uploader=&category=2145&sub_category=2183"
|
movie = mhd
|
||||||
"&do=search&order=asc&sort=publish_date",
|
break
|
||||||
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
|
||||||
)
|
|
||||||
sleep(1)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
if not movie:
|
||||||
rows = soup.select("table.table tr")
|
rows = parse_ygg(title)
|
||||||
column = None
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
columns = row.find_all("td")
|
|
||||||
|
|
||||||
if len(columns) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
|
|
||||||
if is_valid(
|
|
||||||
int(columns[2].a["target"]),
|
|
||||||
parse_size(columns[5].text.strip()),
|
|
||||||
int(columns[6].text.strip()),
|
|
||||||
):
|
|
||||||
column = columns
|
|
||||||
break
|
|
||||||
|
|
||||||
if column is None:
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
columns = row.find_all("td")
|
if "multi" in row[1].text.strip().lower():
|
||||||
|
|
||||||
if len(columns) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "multi" in columns[1].text.strip().lower():
|
|
||||||
if is_valid(
|
if is_valid(
|
||||||
int(columns[2].a["target"]),
|
int(row[2].a["target"]),
|
||||||
parse_size(columns[5].text.strip()),
|
parse_size(row[5].text.strip()),
|
||||||
int(columns[6].text.strip()),
|
int(row[6].text.strip()),
|
||||||
):
|
):
|
||||||
column = columns
|
movie = row
|
||||||
break
|
break
|
||||||
|
|
||||||
writer.writerow(
|
writer.writerow(
|
||||||
@ -128,8 +138,8 @@ for feed in args.feed:
|
|||||||
"title": title,
|
"title": title,
|
||||||
"season": season,
|
"season": season,
|
||||||
"episode": episode,
|
"episode": episode,
|
||||||
"name": column[1].text.strip() if column else "",
|
"name": movie[1].text.strip() if movie else "",
|
||||||
"link": column[1].a["href"] if column else "",
|
"link": movie[1].a["href"] if movie else "",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
stdout.flush()
|
stdout.flush()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user