refactor: ♻️ refacto 2hdp parser
This commit is contained in:
parent
6308e6790c
commit
6da2c52a06
@ -9,7 +9,7 @@ RUN apt-get update && \
|
|||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
vim p7zip* git mc lftp rename wget curl procps psmisc \
|
vim p7zip* git mc lftp rename wget curl procps psmisc \
|
||||||
openssh-client transmission-cli speedtest-cli \
|
openssh-client transmission-cli speedtest-cli \
|
||||||
python3-requests python3-transmissionrpc && \
|
python3-bs4 python3-requests python3-transmissionrpc && \
|
||||||
apt-get install -y --no-install-recommends mame-tools yt-dlp && \
|
apt-get install -y --no-install-recommends mame-tools yt-dlp && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
curl -sSL https://raw.githubusercontent.com/MatanZ/tremc/main/tremc -o /usr/local/bin/tremc && \
|
curl -sSL https://raw.githubusercontent.com/MatanZ/tremc/main/tremc -o /usr/local/bin/tremc && \
|
||||||
|
164
commands/2hdp.py
164
commands/2hdp.py
@ -1,85 +1,133 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from csv import DictWriter
|
from csv import DictWriter
|
||||||
|
from datetime import datetime
|
||||||
from sys import stdout
|
from sys import stdout
|
||||||
|
from time import sleep
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from requests import get
|
from requests import get
|
||||||
from transmissionrpc import Client, DefaultHTTPHandler # type: ignore
|
|
||||||
|
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
parser.add_argument("-u", "--username", required=True)
|
parser.add_argument(
|
||||||
parser.add_argument("-p", "--password", required=True)
|
"--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
|
||||||
|
)
|
||||||
|
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
|
||||||
|
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
|
||||||
|
parser.add_argument(
|
||||||
|
"--feed", required=True, action="append", help="RSS feed to search for new movies"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
class CustomHTTPHandler(DefaultHTTPHandler):
|
def nb_files(id: int):
|
||||||
def request(self, url, query, headers, timeout):
|
response = get(
|
||||||
headers["User-Agent"] = (
|
f"https://www.ygg.re/engine/get_files?torrent={id}",
|
||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
|
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||||
)
|
)
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
return super().request(url, query, headers, timeout)
|
json = response.json()
|
||||||
|
soup = BeautifulSoup(json["html"], "html.parser")
|
||||||
|
rows = soup.find_all("tr")
|
||||||
|
length = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
columns = row.find_all("td")
|
||||||
|
|
||||||
|
if columns[1].text.strip()[-3:] == "mkv":
|
||||||
|
length += 1
|
||||||
|
|
||||||
|
return length
|
||||||
|
|
||||||
|
|
||||||
client = Client(
|
def parse_size(size):
|
||||||
"https://torrent.crystalyx.net/transmission/rpc",
|
# https://stackoverflow.com/a/42865957
|
||||||
port=443,
|
units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
|
||||||
user=args.username,
|
number = size[:-2]
|
||||||
password=args.password,
|
unit = size[-2:]
|
||||||
http_handler=CustomHTTPHandler(),
|
|
||||||
)
|
|
||||||
|
|
||||||
movies = []
|
return int(float(number) * units[unit])
|
||||||
torrents = client.get_torrents()
|
|
||||||
writer = DictWriter(stdout, fieldnames=["season", "title", "hash", "url"])
|
|
||||||
|
def is_valid(id: int, size: int, completed: int):
|
||||||
|
return size < parse_size("10Go") and completed > 10 and nb_files(id) == 1
|
||||||
|
|
||||||
|
|
||||||
|
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
||||||
tree = ElementTree.fromstring(get("https://feed.ausha.co/Loa7srdWGm1b").text)
|
for feed in args.feed:
|
||||||
|
tree = ElementTree.fromstring(get(feed).text)
|
||||||
|
loop = 0
|
||||||
|
|
||||||
for item in tree.findall(".//item"):
|
for item in tree.findall(".//item")[::-1]:
|
||||||
title = item.find("title")
|
loop += 1
|
||||||
season = item.find("itunes:season", {"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"})
|
title = str(item.findtext("title")).strip()
|
||||||
|
season = int(
|
||||||
if season is None or title is None or title.text is None:
|
item.findtext(
|
||||||
continue
|
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
|
||||||
|
)
|
||||||
row = {
|
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
|
||||||
"title": title.text,
|
)
|
||||||
"season": f"Saison {season.text}",
|
episode = int(
|
||||||
"hash": "",
|
item.findtext(
|
||||||
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date",
|
"itunes:episode",
|
||||||
}
|
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
|
||||||
|
)
|
||||||
for torrent in torrents:
|
or loop
|
||||||
if title.text.lower() in torrent.name.lower():
|
|
||||||
row["hash"] = torrent.hashString
|
|
||||||
break
|
|
||||||
|
|
||||||
movies.append(title.text.lower())
|
|
||||||
writer.writerow(row)
|
|
||||||
|
|
||||||
tree = ElementTree.fromstring(
|
|
||||||
get("https://www.calvinballconsortium.fr/podcasts/leretourdujeudi/feed.xml").text
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for item in tree.findall(".//item"):
|
response = get(
|
||||||
title = item.find("title")
|
f"https://www.ygg.re/engine/search?name={title}"
|
||||||
|
"&description=&file=&uploader=&category=2145&sub_category=2183"
|
||||||
|
"&do=search&order=asc&sort=publish_date",
|
||||||
|
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
|
||||||
|
)
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
if title is None or title.text is None or title.text.lower() in movies:
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
rows = soup.select("table.table tr")
|
||||||
|
column = None
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
columns = row.find_all("td")
|
||||||
|
|
||||||
|
if len(columns) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
row = {
|
if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
|
||||||
"title": title.text,
|
if is_valid(
|
||||||
"season": "Jeudi",
|
int(columns[2].a["target"]),
|
||||||
"hash": "",
|
parse_size(columns[5].text.strip()),
|
||||||
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date",
|
int(columns[6].text.strip()),
|
||||||
}
|
):
|
||||||
|
column = columns
|
||||||
for torrent in torrents:
|
|
||||||
if title.text.lower() in torrent.name.lower():
|
|
||||||
row["hash"] = torrent.hashString
|
|
||||||
break
|
break
|
||||||
|
|
||||||
movies.append(title.text.lower())
|
if column is None:
|
||||||
writer.writerow(row)
|
for row in rows:
|
||||||
|
columns = row.find_all("td")
|
||||||
|
|
||||||
|
if len(columns) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "multi" in columns[1].text.strip().lower():
|
||||||
|
if is_valid(
|
||||||
|
int(columns[2].a["target"]),
|
||||||
|
parse_size(columns[5].text.strip()),
|
||||||
|
int(columns[6].text.strip()),
|
||||||
|
):
|
||||||
|
column = columns
|
||||||
|
break
|
||||||
|
|
||||||
|
writer.writerow(
|
||||||
|
{
|
||||||
|
"title": title,
|
||||||
|
"season": season,
|
||||||
|
"episode": episode,
|
||||||
|
"name": column[1].text.strip() if column else "",
|
||||||
|
"link": column[1].a["href"] if column else "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
59
poetry.lock
generated
59
poetry.lock
generated
@ -1,5 +1,26 @@
|
|||||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "beautifulsoup4"
|
||||||
|
version = "4.12.3"
|
||||||
|
description = "Screen-scraping library"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6.0"
|
||||||
|
files = [
|
||||||
|
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
||||||
|
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
soupsieve = ">1.2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
cchardet = ["cchardet"]
|
||||||
|
chardet = ["chardet"]
|
||||||
|
charset-normalizer = ["charset-normalizer"]
|
||||||
|
html5lib = ["html5lib"]
|
||||||
|
lxml = ["lxml"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
version = "2024.12.14"
|
version = "2024.12.14"
|
||||||
@ -251,6 +272,17 @@ files = [
|
|||||||
{file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"},
|
{file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "soupsieve"
|
||||||
|
version = "2.6"
|
||||||
|
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
|
||||||
|
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tomli"
|
name = "tomli"
|
||||||
version = "2.2.1"
|
version = "2.2.1"
|
||||||
@ -303,6 +335,31 @@ files = [
|
|||||||
{file = "transmissionrpc_ng-0.13.1-py3-none-any.whl", hash = "sha256:092954f0571392cd6ff498ea9b5a55de3b20f68fd919d6fe94e4f5e112a750bc"},
|
{file = "transmissionrpc_ng-0.13.1-py3-none-any.whl", hash = "sha256:092954f0571392cd6ff498ea9b5a55de3b20f68fd919d6fe94e4f5e112a750bc"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-beautifulsoup4"
|
||||||
|
version = "4.12.0.20241020"
|
||||||
|
description = "Typing stubs for beautifulsoup4"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "types-beautifulsoup4-4.12.0.20241020.tar.gz", hash = "sha256:158370d08d0cd448bd11b132a50ff5279237a5d4b5837beba074de152a513059"},
|
||||||
|
{file = "types_beautifulsoup4-4.12.0.20241020-py3-none-any.whl", hash = "sha256:c95e66ce15a4f5f0835f7fbc5cd886321ae8294f977c495424eaf4225307fd30"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
types-html5lib = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "types-html5lib"
|
||||||
|
version = "1.1.11.20241018"
|
||||||
|
description = "Typing stubs for html5lib"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"},
|
||||||
|
{file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "types-requests"
|
name = "types-requests"
|
||||||
version = "2.32.0.20241016"
|
version = "2.32.0.20241016"
|
||||||
@ -348,4 +405,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "b1f91153e63e166bbdef0bab71c32f2b9033b0162b752b2ae569e73f6949ec87"
|
content-hash = "5f000dd0b221e422f68ef075abe504a8ed6776ab8065ff8c173a072e95ad7307"
|
||||||
|
@ -7,12 +7,14 @@ package-mode = false
|
|||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
|
beautifulsoup4 = "^4.12.3"
|
||||||
transmissionrpc-ng = "~0.13.1"
|
transmissionrpc-ng = "~0.13.1"
|
||||||
requests = "^2.32.3"
|
requests = "^2.32.3"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
ruff = "^0.8.4"
|
ruff = "^0.8.4"
|
||||||
mypy = "^1.14.0"
|
mypy = "^1.14.0"
|
||||||
|
types-beautifulsoup4 = "^4.12.0"
|
||||||
types-requests = "^2.32.0"
|
types-requests = "^2.32.0"
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
Loading…
Reference in New Issue
Block a user