refactor: ♻️ refacto 2hdp parser
All checks were successful
dl / lint (push) Successful in 1m34s
dl / docker (push) Successful in 2m39s

This commit is contained in:
Michel Roux 2024-12-21 21:58:33 +01:00
parent 6308e6790c
commit 6da2c52a06
4 changed files with 166 additions and 59 deletions

View File

@ -9,7 +9,7 @@ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
vim p7zip* git mc lftp rename wget curl procps psmisc \ vim p7zip* git mc lftp rename wget curl procps psmisc \
openssh-client transmission-cli speedtest-cli \ openssh-client transmission-cli speedtest-cli \
python3-requests python3-transmissionrpc && \ python3-bs4 python3-requests python3-transmissionrpc && \
apt-get install -y --no-install-recommends mame-tools yt-dlp && \ apt-get install -y --no-install-recommends mame-tools yt-dlp && \
rm -rf /var/lib/apt/lists/* && \ rm -rf /var/lib/apt/lists/* && \
curl -sSL https://raw.githubusercontent.com/MatanZ/tremc/main/tremc -o /usr/local/bin/tremc && \ curl -sSL https://raw.githubusercontent.com/MatanZ/tremc/main/tremc -o /usr/local/bin/tremc && \

View File

@ -1,85 +1,133 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from argparse import ArgumentParser from argparse import ArgumentParser
from csv import DictWriter from csv import DictWriter
from datetime import datetime
from sys import stdout from sys import stdout
from time import sleep
from xml.etree import ElementTree from xml.etree import ElementTree
from bs4 import BeautifulSoup
from requests import get from requests import get
from transmissionrpc import Client, DefaultHTTPHandler # type: ignore
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("-u", "--username", required=True) parser.add_argument(
parser.add_argument("-p", "--password", required=True) "--keyword", action="append", default=["mhdgz"], help="Prefered words to search on names"
)
parser.add_argument("--cookie", required=True, help="Cookies to bypass CloudFlare")
parser.add_argument("--user-agent", required=True, help="User Agent to bypass CloudFlare")
parser.add_argument(
"--feed", required=True, action="append", help="RSS feed to search for new movies"
)
args = parser.parse_args() args = parser.parse_args()
class CustomHTTPHandler(DefaultHTTPHandler): def nb_files(id: int):
def request(self, url, query, headers, timeout): response = get(
headers["User-Agent"] = ( f"https://www.ygg.re/engine/get_files?torrent={id}",
"Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0" headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
) )
sleep(1)
return super().request(url, query, headers, timeout) json = response.json()
soup = BeautifulSoup(json["html"], "html.parser")
rows = soup.find_all("tr")
length = 0
for row in rows:
columns = row.find_all("td")
if columns[1].text.strip()[-3:] == "mkv":
length += 1
return length
client = Client( def parse_size(size):
"https://torrent.crystalyx.net/transmission/rpc", # https://stackoverflow.com/a/42865957
port=443, units = {"Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
user=args.username, number = size[:-2]
password=args.password, unit = size[-2:]
http_handler=CustomHTTPHandler(),
)
movies = [] return int(float(number) * units[unit])
torrents = client.get_torrents()
writer = DictWriter(stdout, fieldnames=["season", "title", "hash", "url"])
def is_valid(id: int, size: int, completed: int):
return size < parse_size("10Go") and completed > 10 and nb_files(id) == 1
writer = DictWriter(stdout, fieldnames=["title", "season", "episode", "name", "link"])
writer.writeheader() writer.writeheader()
tree = ElementTree.fromstring(get("https://feed.ausha.co/Loa7srdWGm1b").text) for feed in args.feed:
tree = ElementTree.fromstring(get(feed).text)
loop = 0
for item in tree.findall(".//item"): for item in tree.findall(".//item")[::-1]:
title = item.find("title") loop += 1
season = item.find("itunes:season", {"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}) title = str(item.findtext("title")).strip()
season = int(
item.findtext(
"itunes:season", namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
)
or datetime.strptime(item.findtext("pubDate") or "", "%a, %d %b %Y %H:%M:%S %z").year
)
episode = int(
item.findtext(
"itunes:episode",
namespaces={"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"},
)
or loop
)
if season is None or title is None or title.text is None: response = get(
f"https://www.ygg.re/engine/search?name={title}"
"&description=&file=&uploader=&category=2145&sub_category=2183"
"&do=search&order=asc&sort=publish_date",
headers={"Cookie": args.cookie, "User-Agent": args.user_agent},
)
sleep(1)
soup = BeautifulSoup(response.text, "html.parser")
rows = soup.select("table.table tr")
column = None
for row in rows:
columns = row.find_all("td")
if len(columns) == 0:
continue continue
row = { if any(keyword.lower() in columns[1].text.strip().lower() for keyword in args.keyword):
"title": title.text, if is_valid(
"season": f"Saison {season.text}", int(columns[2].a["target"]),
"hash": "", parse_size(columns[5].text.strip()),
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date", int(columns[6].text.strip()),
} ):
column = columns
for torrent in torrents:
if title.text.lower() in torrent.name.lower():
row["hash"] = torrent.hashString
break break
movies.append(title.text.lower()) if column is None:
writer.writerow(row) for row in rows:
columns = row.find_all("td")
tree = ElementTree.fromstring( if len(columns) == 0:
get("https://www.calvinballconsortium.fr/podcasts/leretourdujeudi/feed.xml").text
)
for item in tree.findall(".//item"):
title = item.find("title")
if title is None or title.text is None or title.text.lower() in movies:
continue continue
row = { if "multi" in columns[1].text.strip().lower():
"title": title.text, if is_valid(
"season": "Jeudi", int(columns[2].a["target"]),
"hash": "", parse_size(columns[5].text.strip()),
"url": f"https://www.ygg.re/engine/search?name={title.text}&description=&file=&uploader=&category=2145&sub_category=2183&option_langue:multiple[0]=4&do=search&order=asc&sort=publish_date", int(columns[6].text.strip()),
} ):
column = columns
for torrent in torrents:
if title.text.lower() in torrent.name.lower():
row["hash"] = torrent.hashString
break break
movies.append(title.text.lower()) writer.writerow(
writer.writerow(row) {
"title": title,
"season": season,
"episode": episode,
"name": column[1].text.strip() if column else "",
"link": column[1].a["href"] if column else "",
}
)

59
poetry.lock generated
View File

@ -1,5 +1,26 @@
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
[[package]]
name = "beautifulsoup4"
version = "4.12.3"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
]
[package.dependencies]
soupsieve = ">1.2"
[package.extras]
cchardet = ["cchardet"]
chardet = ["chardet"]
charset-normalizer = ["charset-normalizer"]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2024.12.14" version = "2024.12.14"
@ -251,6 +272,17 @@ files = [
{file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"}, {file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"},
] ]
[[package]]
name = "soupsieve"
version = "2.6"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
python-versions = ">=3.8"
files = [
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
]
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "2.2.1" version = "2.2.1"
@ -303,6 +335,31 @@ files = [
{file = "transmissionrpc_ng-0.13.1-py3-none-any.whl", hash = "sha256:092954f0571392cd6ff498ea9b5a55de3b20f68fd919d6fe94e4f5e112a750bc"}, {file = "transmissionrpc_ng-0.13.1-py3-none-any.whl", hash = "sha256:092954f0571392cd6ff498ea9b5a55de3b20f68fd919d6fe94e4f5e112a750bc"},
] ]
[[package]]
name = "types-beautifulsoup4"
version = "4.12.0.20241020"
description = "Typing stubs for beautifulsoup4"
optional = false
python-versions = ">=3.8"
files = [
{file = "types-beautifulsoup4-4.12.0.20241020.tar.gz", hash = "sha256:158370d08d0cd448bd11b132a50ff5279237a5d4b5837beba074de152a513059"},
{file = "types_beautifulsoup4-4.12.0.20241020-py3-none-any.whl", hash = "sha256:c95e66ce15a4f5f0835f7fbc5cd886321ae8294f977c495424eaf4225307fd30"},
]
[package.dependencies]
types-html5lib = "*"
[[package]]
name = "types-html5lib"
version = "1.1.11.20241018"
description = "Typing stubs for html5lib"
optional = false
python-versions = ">=3.8"
files = [
{file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"},
{file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"},
]
[[package]] [[package]]
name = "types-requests" name = "types-requests"
version = "2.32.0.20241016" version = "2.32.0.20241016"
@ -348,4 +405,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "b1f91153e63e166bbdef0bab71c32f2b9033b0162b752b2ae569e73f6949ec87" content-hash = "5f000dd0b221e422f68ef075abe504a8ed6776ab8065ff8c173a072e95ad7307"

View File

@ -7,12 +7,14 @@ package-mode = false
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
beautifulsoup4 = "^4.12.3"
transmissionrpc-ng = "~0.13.1" transmissionrpc-ng = "~0.13.1"
requests = "^2.32.3" requests = "^2.32.3"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
ruff = "^0.8.4" ruff = "^0.8.4"
mypy = "^1.14.0" mypy = "^1.14.0"
types-beautifulsoup4 = "^4.12.0"
types-requests = "^2.32.0" types-requests = "^2.32.0"
[tool.ruff] [tool.ruff]