From 92adf237a193535a2b52c039ec0576bdc68def89 Mon Sep 17 00:00:00 2001 From: Michel Roux Date: Sun, 13 Nov 2022 17:14:26 +0100 Subject: [PATCH] Remove BABS and add pyyg --- Dockerfile | 3 +- commands/BABS.py | 408 ----------------------------------------------- commands/pyyg.py | 169 ++++++++++++++++++++ 3 files changed, 171 insertions(+), 409 deletions(-) delete mode 100755 commands/BABS.py create mode 100644 commands/pyyg.py diff --git a/Dockerfile b/Dockerfile index 923b25f..48cfa9a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,8 @@ FROM linuxserver/ffmpeg:4.4-cli-ls65 RUN apt-get update && \ apt-get install -y \ vim p7zip* git rsync lftp speedtest-cli rename megatools wget curl procps psmisc \ - openssh-client transmission-cli python3-requests python3-pip && \ + openssh-client transmission-cli python3-pip \ + python3-requests python3-bs4 python3-dnspython && \ rm -rf /var/lib/apt/lists/* RUN pip3 install yt-dlp && \ curl -sSL https://raw.githubusercontent.com/tremc/tremc/master/tremc -o /usr/local/bin/tremc && \ diff --git a/commands/BABS.py b/commands/BABS.py deleted file mode 100755 index 75c5b19..0000000 --- a/commands/BABS.py +++ /dev/null @@ -1,408 +0,0 @@ -# coding=utf-8 -# -# Most code here is copyright (c) 2010 Plex Development Team. All rights reserved. -# -# Better ABsolute Scanner based on default scanner code from PMS 0.9.3.5 for Ubuntu -# 2011-10-15 by jmjf (on Plex Forums) -# -# a version of the Plex Series Scanner that does a better job of dealing with absolute numbered files -# and addresses the problem of series with numbers in the the name. -# -# Place this file in /var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Scanners/Series -# /var/lib/plexmediaserver is a.k.a. ~/ to the plex user -# you may have to create the Series subdirectory -# -# This code has only been tested on Ubuntu 10.4.2 with PMS 0.9.3.5 and my media collection. Use at your own risk. -# That said, I expect it will work on other platforms. -# -# all debug messages are left in -- will only show on console -- include BABS: at beginning of line -# all changes except debug messages are called out with comments begining # BABS -- -# -# Other modifications by Xefir Destiny -# -import re, os, os.path -import Media, VideoFiles, Stack, Utils -from mp4file import mp4file, atomsearch - -episode_regexps = [ - '(?P.*?)[vVsS](?P[0-9]{1,2})[\._ ]*[eE](?P[0-9]+)([- ]?[Ee+](?P[0-9]+))?', # S03E04-E05 - '(?P.*?)[vVsS](?P[0-9]{1,2})[\._\- ]+(?P[0-9]+)', # S03-03 - '(?P.*?)([^0-9]|^)(?P[0-9]{1,2})[Xx](?P[0-9]+)(-[0-9]+[Xx](?P[0-9]+))?', # 3x03 - '(.*?)(^|[\._\- ])+(?Psp)[\._ ]*(?P[0-9]{1,3})([\._\- ]|$)+', # SP01 (Special 01, equivalent to S00E01) -] -# BABS -- Removed the ".602." expresion from the list above. See default scanner to recover it. - -date_regexps = [ - '(?P[0-9]{4})[^0-9a-zA-Z]+(?P[0-9]{2})[^0-9a-zA-Z]+(?P[0-9]{2})([^0-9]|$)', # 2009-02-10 - '(?P[0-9]{2})[^0-9a-zA-Z]+(?P[0-9]{2})[^0-9a-zA-Z(]+(?P[0-9]{4})([^0-9a-zA-Z]|$)', # 02-10-2009 -] - -standalone_episode_regexs = [ - '(.*?)( \(([0-9]+)\))? - ([0-9]+)+x([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?', # Newzbin style, no _UNPACK_ - '(.*?)( \(([0-9]+)\))?[Ss]([0-9]+)+[Ee]([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?' # standard s00e00 -] - -season_regex = '.*?(?P[0-9]{1,2})+(?![^\(\[]*[\)\]])' # folder for a season - -just_episode_regexs = [ - '(?P[0-9]{1,3})[\. -_]*of[\. -_]*[0-9]{1,3}', # 01 of 08 - '^(?P[0-9]{1,3})[^0-9]', # 01 - Foo - '(^|[ \.\-_])[eé](p{0,1}|(pisode){0,1})[ \.\-_]*(?P[0-9]{1,3})([^0-9c-uw-z%]|$)', # Blah Blah ep234 - '.*?[ \.\-_](?P[0-9]{2,3})[^0-9c-uw-z%]+', # Flah - 04 - Blah - '.*?[ \.\-_](?P[0-9]{2,3})$', # Flah - 04 - '.*?[^0-9x](?P[0-9]{2,3})$', # Flah707 - '^(?P[0-9]{1,3})$', # 01 - '.*?[^s](?P[0-9]{1,3}).*$', # Fallback -] -# BABS -- modified "Blah Blah ep234" expression to only look for e, ep, or episode -- original scanner looked for e followed by 0 or more a-z - -special_episode_regex = 'special|spécial|oav|oad|ova|ncop|opening|nced|ending|trailer|promo|others|extra|film|movie|bonus' - -ends_with_number = '.*([0-9]{1,2})$' - -ends_with_episode = ['[ ]*[0-9]{1,2}x[0-9]{1,3}$', '[ ]*S[0-9]+E[0-9]+$'] - -# Look for episodes. -def Scan(path, files, mediaList, subdirs, language=None, root=None): - - print "BABS: Scan" - - # Scan for video files. - VideoFiles.Scan(path, files, mediaList, subdirs, root) - - # Take top two as show/season, but require at least the top one. - paths = Utils.SplitPath(path) - - if len(paths) == 1 and len(paths[0]) == 0: - - print "BABS: len(paths) == 1 and len(paths[0]) == 0" - - # Run the select regexps we allow at the top level. - for i in files: - file = os.path.basename(i) - - print "BABS: tlrs os.path.basename, i = |", i, "| file = |", file, "|" - - for rx in episode_regexps: - match = re.search(rx, file, re.IGNORECASE) - if match: - - print "BABS: matched episode_regexps: ", rx - - # Extract data. - show = match.group('show') if match.groupdict().has_key('show') else '' - season = match.group('season') - if season.lower() == 'sp': - season = 0 - else: - season = int(season) - episode = int(match.group('ep')) - endEpisode = episode - if match.groupdict().has_key('secondEp') and match.group('secondEp'): - endEpisode = int(match.group('secondEp')) - - # Clean title. - name, year = VideoFiles.CleanName(show) - if len(name) > 0: - for ep in range(episode, endEpisode+1): - tv_show = Media.Episode(name, season, ep, '', year) - tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1) - tv_show.parts.append(i) - mediaList.append(tv_show) - - elif len(paths) > 0 and len(paths[0]) > 0: - - print "BABS: len(paths) > 0 and len(paths[0]) > 0" - - done = False - - # If we're inside a Plex Versions directory, remove it and the quality directory from consideration. - if 'Plex Versions' in paths and len(paths) > 2: - versions_index = paths.index('Plex Versions') - del paths[versions_index:versions_index + 2] - - # See if parent directory is a perfect match (e.g. a directory like "24 - 8x02 - Day 8_ 5_00P.M. - 6_00P.M") - if len(files) == 1: - for rx in standalone_episode_regexs: - res = re.findall(rx, paths[-1]) - if len(res): - - print "BABS: match on standalone_episode_regexs ", rx - - show, junk, year, season, episode, junk, endEpisode, junk, title = res[0] - - # If it didn't have a show, then grab it from the directory. - if len(show) == 0: - (show, year) = VideoFiles.CleanName(paths[0]) - else: - (show, ignore) = VideoFiles.CleanName(show) - - print "BABS: standalone, show from directory, show = |", show, "| year = |", year, "|" - - episode = int(episode) - if len(endEpisode) > 0: - endEpisode = int(endEpisode) - else: - endEpisode = episode - - for ep in range(episode, endEpisode+1): - tv_show = Media.Episode(show, season, ep, title, year) - tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1) - tv_show.parts.append(files[0]) - mediaList.append(tv_show) - - done = True - break - - if done == False: - - # Not a perfect standalone match, so get information from directories. (e.g. "Lost/Season 1/s0101.mkv") - season = None - seasonNumber = None - - (show, year) = VideoFiles.CleanName(paths[0]) - - print "BABS: not perfect match: show = |", show, "| year = |", year, "|" - - # Which component looks like season? - if len(paths) >= 2: - - print "BABS: len(paths) >= 2" - - season = paths[len(paths)-1] - match = re.match(season_regex, season, re.IGNORECASE) - if match: - seasonNumber = int(match.group('season')) - - print "BABS: matched season_regex with season = |", season, "| seasonNumber =", seasonNumber - - print "BABS: before ends_with_episode, show = |", show, "|" - - # Make sure an episode name didn't make it into the show. - for rx in ends_with_episode: - show = re.sub(rx, '', show) - - print "BABS: after ends_with_episode, show = |", show, "|" - - for i in files: - done = False - file = os.path.basename(i) - - print "BABS: os.path.basename, i = |", i, "| file = |", file, "|" - - (file, ext) = os.path.splitext(file) - - print "BABS: os.path.splitext, file = |", file, "| ext = |", ext, "|" - - if ext.lower() in ['.mp4', '.m4v', '.mov']: - - print "BABS: try mp4 tags" - - m4season = m4ep = m4year = 0 - m4show = title = '' - try: - mp4fileTags = mp4file.Mp4File(i) - - # Show. - try: m4show = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvshow').encode('utf-8') - except: pass - - # Season. - try: m4season = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvseason')) - except: pass - - # Episode. - m4ep = None - try: - # tracknum (can be 101) - m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tracknum')) - except: - try: - # tvepisodenum (can be S2E16) - m4ep = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisodenum') - except: - # TV Episode (can be 101) - m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisode')) - - if m4ep is not None: - found = False - try: - # See if it matches regular expression. - for rx in episode_regexps: - match = re.search(rx, file, re.IGNORECASE) - if match: - m4season = int(match.group('season')) - m4ep = int(match.group('ep')) - found = True - - if found == False and re.match('[0-9]+', str(m4ep)): - # Carefully convert to episode number. - m4ep = int(m4ep) % 100 - elif found == False: - m4ep = int(re.findall('[0-9]+', m4ep)[0]) - except: - pass - - # Title. - try: title = find_data(mp4fileTags, 'moov/udta/meta/ilst/title').encode('utf-8') - except: pass - - # Note: Dates/years embedded in episode files tend to be air or "recorded on" dates, which can - # mislead the agent when doing series matching, so we will no longer pass those up as hints here. - - # If we have all the data we need, add it. - if len(m4show) > 0 and m4season > 0 and m4ep > 0: - tv_show = Media.Episode(m4show, m4season, m4ep, title, year) - tv_show.parts.append(i) - mediaList.append(tv_show) - continue - - except: - pass - - # Check for date-based regexps first. - for rx in date_regexps: - match = re.search(rx, file) - if match: - - # Make sure there's not a stronger season/ep match for the same file. - try: - for r in episode_regexps + standalone_episode_regexs: - if re.search(r, file): - raise - except: - break - - print "BABS: matched date_regexps ", rx - - year = int(match.group('year')) - month = int(match.group('month')) - day = int(match.group('day')) - - # Use the year as the season. - tv_show = Media.Episode(show, year, None, None, None) - tv_show.released_at = '%d-%02d-%02d' % (year, month, day) - tv_show.parts.append(i) - mediaList.append(tv_show) - - done = True - break - - if done == False: - - # Take the year out, because it's not going to help at this point. - cleanName, cleanYear = VideoFiles.CleanName(file) - if not year and cleanYear: - year = cleanYear - - print "BABS: after CleanName file = |", file, "| cleanName = |", cleanName, "| cleanYear = |", cleanYear, "|" - - if cleanYear != None: - file = file.replace(str(cleanYear), 'XXXX') - - print "BABS: replaced year, file = |", file, "|" - - # Minor cleaning on the file to avoid false matches on H.264, 720p, etc. - whackRx = ['([hHx][\.]?264)[^0-9]', '[^[0-9](720[pP])', '[^[0-9](1080[pP])', '[^[0-9](480[pP])'] - for rx in whackRx: - file = re.sub(rx, ' ', file) - - print "BABS: after whackRx, file = |", file, "|" - - for rx in episode_regexps: - - match = re.search(rx, file, re.IGNORECASE) - if match: - - print "BABS: matched episode_regexps ", rx - - # Parse season and episode. - the_season = match.group('season') - if the_season.lower() == 'sp': - the_season = 0 - else: - the_season = int(the_season) - episode = int(match.group('ep')) - endEpisode = episode - if match.groupdict().has_key('secondEp') and match.group('secondEp'): - endEpisode = int(match.group('secondEp')) - - for ep in range(episode, endEpisode+1): - tv_show = Media.Episode(show, the_season, ep, None, year) - tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1) - tv_show.parts.append(i) - mediaList.append(tv_show) - - done = True - break - - if done == False: - - print "BABS: dealing with episode? file = |", file, "|" - - # BABS -- Before we do CleanName, which will remove any dashes, etc. in filename, attempt to remove series name found in directory - file = re.sub(show, 'X', file) - print "BABS: before CleanName, remove show, show = |", show, "| file = |", file, "|" - - # OK, next let's see if we're dealing with something that looks like an episode. - # Begin by cleaning the filename to remove garbage like "h.264" that could throw - # things off. - # - (file, fileYear) = VideoFiles.CleanName(file) - - # if don't have a good year from before (when checking the parent folders) AND we just got a good year, use it. - if not year and fileYear: - year = fileYear - - print "BABS: episodes: CleanName: file = |", file, "| year = |", year, "|" - - # BABS -- And do it again, just in case the directory is off by things CleanName handles - file = re.sub(show, 'X', file) - print "BABS: after CleanName, remove show, show = |", show, "| file = |", file, "|" - - for rx in just_episode_regexs: - episode_match = re.search(rx, file, re.IGNORECASE | re.UNICODE) - if episode_match is not None: - - print "BABS: matched just_episode_regexs ", rx - - the_episode = int(episode_match.group('ep')) - - # Now look for a season. - if re.search(special_episode_regex, file, re.IGNORECASE | re.UNICODE): - the_season = 0 - elif seasonNumber is not None: - the_season = seasonNumber - else: - the_season = 1 - - print "BABS: the_season =", the_season - print "BABS: show = |", show, "| the_episode =", the_episode - - tv_show = Media.Episode(show, the_season, the_episode, None, year) - tv_show.parts.append(i) - mediaList.append(tv_show) - done = True - break - - if done == False: - print "Got nothing for:", file - - # Stack the results. - Stack.Scan(path, files, mediaList, subdirs) - -def find_data(atom, name): - child = atomsearch.find_path(atom, name) - data_atom = child.find('data') - if data_atom and 'data' in data_atom.attrs: - return data_atom.attrs['data'] - -import sys - -if __name__ == '__main__': - print "Hello, world!" - path = sys.argv[1] - files = [os.path.join(path, file) for file in os.listdir(path)] - media = [] - Scan(path[1:], files, media, []) - print "Media:", media diff --git a/commands/pyyg.py b/commands/pyyg.py new file mode 100644 index 0000000..1e689b6 --- /dev/null +++ b/commands/pyyg.py @@ -0,0 +1,169 @@ +import argparse +import json +import re +from urllib.parse import urlencode, urlparse + +from bs4 import BeautifulSoup +from dns import rdatatype, resolver +from requests import Session, adapters +from urllib3.util.connection import HAS_IPV6 + +BLACKLIST_WORDS = ["dvd", "iso"] + + +parser = argparse.ArgumentParser() +parser.add_argument("-u", "--uploader", action="append") +parser.add_argument("-y", "--year", type=int) +parser.add_argument("query") +args = parser.parse_args() + + +def parse_size(size): + units = {"o": 1, "Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12} + match = re.search("([0-9.]+)([^0-9]+)", size) + number = match.group(1).strip() + unit = match.group(2).strip() + return int(float(number) * units[unit]) + + +DNS_RESOLVER = resolver.Resolver() +DNS_RESOLVER.cache = resolver.LRUCache() # type: ignore + + +class DNSAdapter(adapters.HTTPAdapter): + def __init__(self, nameservers): + self.nameservers = nameservers + super().__init__() + + def resolve(self, host, nameservers): + DNS_RESOLVER.nameservers = nameservers + + if HAS_IPV6: + try: + answers_v6 = DNS_RESOLVER.resolve(host, rdatatype.AAAA) + for rdata_v6 in answers_v6: + return f"[{str(rdata_v6)}]" + except resolver.NoAnswer: + pass + + answers_v4 = DNS_RESOLVER.resolve(host, rdatatype.A) + for rdata_v4 in answers_v4: + return str(rdata_v4) + + def send(self, request, **kwargs): + connection_pool_kwargs = self.poolmanager.connection_pool_kw + result = urlparse(request.url) + resolved_ip = self.resolve(result.hostname, self.nameservers) + request.url = request.url.replace(result.hostname, resolved_ip) + request.headers["Host"] = result.hostname + request.headers[ + "User-Agent" + ] = "Googlebot/2.1 (+http://www.google.com/bot.html)" + + if result.scheme == "https": + connection_pool_kwargs["server_hostname"] = result.hostname + connection_pool_kwargs["assert_hostname"] = result.hostname + + return super().send(request, **kwargs) + + +session = Session() +session.mount("http://", DNSAdapter(["1.1.1.1"])) +session.mount("https://", DNSAdapter(["1.1.1.1"])) + + +def get_files(id): + req = session.get( + "https://www5.yggtorrent.fi/engine/get_files", params={"torrent": id} + ) + files = json.loads(req.text) + html = BeautifulSoup(files["html"], "html.parser") + trs = html.select("tr") + return len(trs) + + +def search_ygg(query, multi): + ygg_params = { + "name": f"{query} {args.year}", + "description": "", + "file": "", + "uploader": "", + "category": "2145", + "sub_category": "2183", + "do": "search", + "order": "asc", + "sort": "publish_date", + } + + if multi: + ygg_params["option_langue"] = ["4"] + + req = session.get("https://www5.yggtorrent.fi/engine/search", params=ygg_params) + html = BeautifulSoup(req.text, "html.parser") + trs = html.select("table.table tr") + + if len(trs) > 1: + for i, tr in enumerate(trs): + if not i: + continue + + tds = tr.find_all("td") + size = tds[5].get_text() + name = tds[1].get_text().lower().strip() + + if parse_size(size) > parse_size("10Go"): + continue + + if any(word.lower() in name for word in BLACKLIST_WORDS): + continue + + if args.uploader and not any( + uploader.lower() in name for uploader in args.uploader + ): + continue + + link = tds[1].a["href"] + id = link.split("/")[-1].split("-")[0] + + if get_files(id) > 1: + continue + + print(f"{name} {args.year} {link}") + exit(0) + + +query_string = {"query": args.query, "filters": "type:movie"} + +if args.year: + query_string["filters"] += " AND year:" + str(args.year) + +tvdb = session.post( + "https://tvshowtime-dsn.algolia.net/1/indexes/TVDB/query", + params={ + "x-algolia-application-id": "tvshowtime", + "x-algolia-api-key": "c9d5ec1316cec12f093754c69dd879d3", + }, + json={"params": urlencode(query_string)}, +) + +tvdata = json.loads(tvdb.text) + +if not tvdata["nbHits"] > 0: + print("Can't find query on TheTVDB") + exit(1) + +eng = tvdata["hits"][0]["name"] + +fra = ( + tvdata["hits"][0]["translations"]["fra"] + if "fra" in tvdata["hits"][0]["translations"] + else args.query +) + + +search_ygg(args.query, True) +search_ygg(fra, True) +search_ygg(eng, True) +search_ygg(args.query, False) +search_ygg(fra, False) +search_ygg(eng, False)