refactor: ♻️ refacto 2hdp parser
This commit is contained in:
parent
6308e6790c
commit
6da2c52a06
@ -9,7 +9,7 @@ RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
vim p7zip* git mc lftp rename wget curl procps psmisc \
|
||||
openssh-client transmission-cli speedtest-cli \
|
||||
python3-requests python3-transmissionrpc && \
|
||||
python3-bs4 python3-requests python3-transmissionrpc && \
|
||||
apt-get install -y --no-install-recommends mame-tools yt-dlp && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
curl -sSL https://raw.githubusercontent.com/MatanZ/tremc/main/tremc -o /usr/local/bin/tremc && \
|
||||
|
162
commands/2hdp.py
162
commands/2hdp.py
@ -1,85 +1,133 @@
|
||||
#!/usr/bin/env python3
|
||||
from argparse import ArgumentParser
|
||||
from csv import DictWriter
|
||||
from datetime import datetime
|
||||
from sys import stdout
|
||||
from time import sleep
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from requests import get
|
||||
from transmissionrpc import Client, DefaultHTTPHandler # type: ignore
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("-u", "--username", required=True)
|
||||
parser.add_argument("-p", "--password", required=True)
|
||||
parser.add_argument(
|
||||
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
|
||||
)
|
||||
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
||||
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
||||
parser.add_argument(
|
||||
"--feed", required=True, action="append", help="RSS feed to search for new movies"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
class CustomHTTPHandler(DefaultHTTPHandler):
|
||||
def request(self, url, query, headers, timeout):
|
||||
headers["User-Agent"] = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
|
||||
)
|
||||
def nb_files(id: int):
|
||||
response = get(
|
||||
f"https://www.ygg.re/engine/get_files?torrent={id}",
|
||||
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||
)
|
||||
sleep(1)
|
||||
|
||||
return super().request(url, query, headers, timeout)
|
||||
json = response.json()
|
||||
soup = BeautifulSoup(json["html"], "html.parser")
|
||||
rows = soup.find_all("tr")
|
||||
length = 0
|
||||
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
if columns[1].text.strip()[-3:] == "mkv":
|
||||
length += 1
|
||||
|
||||
return length
|
||||
|
||||
|
||||
client = Client(
|
||||
"https://torrent.crystalyx.net/transmission/rpc",
|
||||
port=443,
|
||||
user=args.username,
|
||||
password=args.password,
|
||||
http_handler=CustomHTTPHandler(),
|
||||
)
|
||||
def parse_size(size):
|
||||
# https://stackoverflow.com/a/42865957
|
||||
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
|
||||
number = size[:-2]
|
||||
unit = size[-2:]
|
||||
|
||||
movies = []
|
||||
torrents = client.get_torrents()
|
||||
writer = DictWriter(stdout, fieldnames=["season", "title", "hash", "url"])
|
||||
return int(float(number) * units[unit])
|
||||
|
||||
|
||||
def is_valid(id: int, size: int, completed: int):
|
||||
return size < parse_size("10Go") and completed > 10 and nb_files(id) == 1
|
||||
|
||||
|
||||
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
||||
writer.writeheader()
|
||||
|
||||
tree = ElementTree.fromstring(get("https://feed.ausha.co/Loa7srdWGm1b").text)
|
||||
for feed in args.feed:
|
||||
tree = ElementTree.fromstring(get(feed).text)
|
||||
loop = 0
|
||||
|
||||
for item in tree.findall(".//item"):
|
||||
title = item.find("title")
|
||||
season = item.find("itunes:season", {"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"})
|
||||
for item in tree.findall(".//item")[::-1]:
|
||||
loop += 1
|
||||
title = str(item.findtext("title")).strip()
|
||||
season = int(
|
||||
item.findtext(
|
||||
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
|
||||
)
|
||||
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
|
||||
)
|
||||
episode = int(
|
||||
item.findtext(
|
||||
"itunes:episode",
|
||||
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
|
||||
)
|
||||
or loop
|
||||
)
|
||||
|
||||
if season is None or title is None or title.text is None:
|
||||
continue
|
||||
response = get(
|
||||
f"https://www.ygg.re/engine/search?name={title}"
|
||||
"&description=&file=&uploader=&category=2145&sub_category=2183"
|
||||
"&do=search&order=asc&sort=publish_date",
|
||||
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||
)
|
||||
sleep(1)
|
||||
|
||||
row = {
|
||||
"title": title.text,
|
||||
"season": f"Saison {season.text}",
|
||||
"hash": "",
|
||||
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date",
|
||||
}
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
rows = soup.select("table.table tr")
|
||||
column = None
|
||||
|
||||
for torrent in torrents:
|
||||
if title.text.lower() in torrent.name.lower():
|
||||
row["hash"] = torrent.hashString
|
||||
break
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
movies.append(title.text.lower())
|
||||
writer.writerow(row)
|
||||
if len(columns) == 0:
|
||||
continue
|
||||
|
||||
tree = ElementTree.fromstring(
|
||||
get("https://www.calvinballconsortium.fr/podcasts/leretourdujeudi/feed.xml").text
|
||||
)
|
||||
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
|
||||
if is_valid(
|
||||
int(columns[2].a["target"]),
|
||||
parse_size(columns[5].text.strip()),
|
||||
int(columns[6].text.strip()),
|
||||
):
|
||||
column = columns
|
||||
break
|
||||
|
||||
for item in tree.findall(".//item"):
|
||||
title = item.find("title")
|
||||
if column is None:
|
||||
for row in rows:
|
||||
columns = row.find_all("td")
|
||||
|
||||
if title is None or title.text is None or title.text.lower() in movies:
|
||||
continue
|
||||
if len(columns) == 0:
|
||||
continue
|
||||
|
||||
row = {
|
||||
"title": title.text,
|
||||
"season": "Jeudi",
|
||||
"hash": "",
|
||||
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date",
|
||||
}
|
||||
if "multi" in columns[1].text.strip().lower():
|
||||
if is_valid(
|
||||
int(columns[2].a["target"]),
|
||||
parse_size(columns[5].text.strip()),
|
||||
int(columns[6].text.strip()),
|
||||
):
|
||||
column = columns
|
||||
break
|
||||
|
||||
for torrent in torrents:
|
||||
if title.text.lower() in torrent.name.lower():
|
||||
row["hash"] = torrent.hashString
|
||||
break
|
||||
|
||||
movies.append(title.text.lower())
|
||||
writer.writerow(row)
|
||||
writer.writerow(
|
||||
{
|
||||
"title": title,
|
||||
"season": season,
|
||||
"episode": episode,
|
||||
"name": column[1].text.strip() if column else "",
|
||||
"link": column[1].a["href"] if column else "",
|
||||
}
|
||||
)
|
||||
|
59
poetry.lock
generated
59
poetry.lock
generated
@ -1,5 +1,26 @@
|
||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.12.3"
|
||||
description = "Screen-scraping library"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
files = [
|
||||
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
||||
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
|
||||
[package.extras]
|
||||
cchardet = ["cchardet"]
|
||||
chardet = ["chardet"]
|
||||
charset-normalizer = ["charset-normalizer"]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2024.12.14"
|
||||
@ -251,6 +272,17 @@ files = [
|
||||
{file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.6"
|
||||
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
|
||||
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.2.1"
|
||||
@ -303,6 +335,31 @@ files = [
|
||||
{file = "transmissionrpc_ng-0.13.1-py3-none-any.whl", hash = "sha256:092954f0571392cd6ff498ea9b5a55de3b20f68fd919d6fe94e4f5e112a750bc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-beautifulsoup4"
|
||||
version = "4.12.0.20241020"
|
||||
description = "Typing stubs for beautifulsoup4"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "types-beautifulsoup4-4.12.0.20241020.tar.gz", hash = "sha256:158370d08d0cd448bd11b132a50ff5279237a5d4b5837beba074de152a513059"},
|
||||
{file = "types_beautifulsoup4-4.12.0.20241020-py3-none-any.whl", hash = "sha256:c95e66ce15a4f5f0835f7fbc5cd886321ae8294f977c495424eaf4225307fd30"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
types-html5lib = "*"
|
||||
|
||||
[[package]]
|
||||
name = "types-html5lib"
|
||||
version = "1.1.11.20241018"
|
||||
description = "Typing stubs for html5lib"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"},
|
||||
{file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-requests"
|
||||
version = "2.32.0.20241016"
|
||||
@ -348,4 +405,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "b1f91153e63e166bbdef0bab71c32f2b9033b0162b752b2ae569e73f6949ec87"
|
||||
content-hash = "5f000dd0b221e422f68ef075abe504a8ed6776ab8065ff8c173a072e95ad7307"
|
||||
|
@ -7,12 +7,14 @@ package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
beautifulsoup4 = "^4.12.3"
|
||||
transmissionrpc-ng = "~0.13.1"
|
||||
requests = "^2.32.3"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ruff = "^0.8.4"
|
||||
mypy = "^1.14.0"
|
||||
types-beautifulsoup4 = "^4.12.0"
|
||||
types-requests = "^2.32.0"
|
||||
|
||||
[tool.ruff]
|
||||
|
Loading…
Reference in New Issue
Block a user