Remove BABS and add pyyg
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2022-11-13 17:14:26 +01:00
parent 65abf1befd
commit 92adf237a1
3 changed files with 171 additions and 409 deletions

View File

@ -3,7 +3,8 @@ FROM linuxserver/ffmpeg:4.4-cli-ls65
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
vim p7zip* git rsync lftp speedtest-cli rename megatools wget curl procps psmisc \ vim p7zip* git rsync lftp speedtest-cli rename megatools wget curl procps psmisc \
openssh-client transmission-cli python3-requests python3-pip && \ openssh-client transmission-cli python3-pip \
python3-requests python3-bs4 python3-dnspython && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN pip3 install yt-dlp && \ RUN pip3 install yt-dlp && \
curl -sSL https://raw.githubusercontent.com/tremc/tremc/master/tremc -o /usr/local/bin/tremc && \ curl -sSL https://raw.githubusercontent.com/tremc/tremc/master/tremc -o /usr/local/bin/tremc && \

View File

@ -1,408 +0,0 @@
# coding=utf-8
#
# Most code here is copyright (c) 2010 Plex Development Team. All rights reserved.
#
# Better ABsolute Scanner based on default scanner code from PMS 0.9.3.5 for Ubuntu
# 2011-10-15 by jmjf (on Plex Forums)
#
# a version of the Plex Series Scanner that does a better job of dealing with absolute numbered files
# and addresses the problem of series with numbers in the the name.
#
# Place this file in /var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Scanners/Series
# /var/lib/plexmediaserver is a.k.a. ~/ to the plex user
# you may have to create the Series subdirectory
#
# This code has only been tested on Ubuntu 10.4.2 with PMS 0.9.3.5 and my media collection. Use at your own risk.
# That said, I expect it will work on other platforms.
#
# all debug messages are left in -- will only show on console -- include BABS: at beginning of line
# all changes except debug messages are called out with comments begining # BABS --
#
# Other modifications by Xefir Destiny
#
import re, os, os.path
import Media, VideoFiles, Stack, Utils
from mp4file import mp4file, atomsearch
episode_regexps = [
'(?P<show>.*?)[vVsS](?P<season>[0-9]{1,2})[\._ ]*[eE](?P<ep>[0-9]+)([- ]?[Ee+](?P<secondEp>[0-9]+))?', # S03E04-E05
'(?P<show>.*?)[vVsS](?P<season>[0-9]{1,2})[\._\- ]+(?P<ep>[0-9]+)', # S03-03
'(?P<show>.*?)([^0-9]|^)(?P<season>[0-9]{1,2})[Xx](?P<ep>[0-9]+)(-[0-9]+[Xx](?P<secondEp>[0-9]+))?', # 3x03
'(.*?)(^|[\._\- ])+(?P<season>sp)[\._ ]*(?P<ep>[0-9]{1,3})([\._\- ]|$)+', # SP01 (Special 01, equivalent to S00E01)
]
# BABS -- Removed the ".602." expresion from the list above. See default scanner to recover it.
date_regexps = [
'(?P<year>[0-9]{4})[^0-9a-zA-Z]+(?P<month>[0-9]{2})[^0-9a-zA-Z]+(?P<day>[0-9]{2})([^0-9]|$)', # 2009-02-10
'(?P<month>[0-9]{2})[^0-9a-zA-Z]+(?P<day>[0-9]{2})[^0-9a-zA-Z(]+(?P<year>[0-9]{4})([^0-9a-zA-Z]|$)', # 02-10-2009
]
standalone_episode_regexs = [
'(.*?)( \(([0-9]+)\))? - ([0-9]+)+x([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?', # Newzbin style, no _UNPACK_
'(.*?)( \(([0-9]+)\))?[Ss]([0-9]+)+[Ee]([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?' # standard s00e00
]
season_regex = '.*?(?P<season>[0-9]{1,2})+(?![^\(\[]*[\)\]])' # folder for a season
just_episode_regexs = [
'(?P<ep>[0-9]{1,3})[\. -_]*of[\. -_]*[0-9]{1,3}', # 01 of 08
'^(?P<ep>[0-9]{1,3})[^0-9]', # 01 - Foo
'(^|[ \.\-_])[eé](p{0,1}|(pisode){0,1})[ \.\-_]*(?P<ep>[0-9]{1,3})([^0-9c-uw-z%]|$)', # Blah Blah ep234
'.*?[ \.\-_](?P<ep>[0-9]{2,3})[^0-9c-uw-z%]+', # Flah - 04 - Blah
'.*?[ \.\-_](?P<ep>[0-9]{2,3})$', # Flah - 04
'.*?[^0-9x](?P<ep>[0-9]{2,3})$', # Flah707
'^(?P<ep>[0-9]{1,3})$', # 01
'.*?[^s](?P<ep>[0-9]{1,3}).*$', # Fallback
]
# BABS -- modified "Blah Blah ep234" expression to only look for e, ep, or episode -- original scanner looked for e followed by 0 or more a-z
special_episode_regex = 'special|spécial|oav|oad|ova|ncop|opening|nced|ending|trailer|promo|others|extra|film|movie|bonus'
ends_with_number = '.*([0-9]{1,2})$'
ends_with_episode = ['[ ]*[0-9]{1,2}x[0-9]{1,3}$', '[ ]*S[0-9]+E[0-9]+$']
# Look for episodes.
def Scan(path, files, mediaList, subdirs, language=None, root=None):
print "BABS: Scan"
# Scan for video files.
VideoFiles.Scan(path, files, mediaList, subdirs, root)
# Take top two as show/season, but require at least the top one.
paths = Utils.SplitPath(path)
if len(paths) == 1 and len(paths[0]) == 0:
print "BABS: len(paths) == 1 and len(paths[0]) == 0"
# Run the select regexps we allow at the top level.
for i in files:
file = os.path.basename(i)
print "BABS: tlrs os.path.basename, i = |", i, "| file = |", file, "|"
for rx in episode_regexps:
match = re.search(rx, file, re.IGNORECASE)
if match:
print "BABS: matched episode_regexps: ", rx
# Extract data.
show = match.group('show') if match.groupdict().has_key('show') else ''
season = match.group('season')
if season.lower() == 'sp':
season = 0
else:
season = int(season)
episode = int(match.group('ep'))
endEpisode = episode
if match.groupdict().has_key('secondEp') and match.group('secondEp'):
endEpisode = int(match.group('secondEp'))
# Clean title.
name, year = VideoFiles.CleanName(show)
if len(name) > 0:
for ep in range(episode, endEpisode+1):
tv_show = Media.Episode(name, season, ep, '', year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(i)
mediaList.append(tv_show)
elif len(paths) > 0 and len(paths[0]) > 0:
print "BABS: len(paths) > 0 and len(paths[0]) > 0"
done = False
# If we're inside a Plex Versions directory, remove it and the quality directory from consideration.
if 'Plex Versions' in paths and len(paths) > 2:
versions_index = paths.index('Plex Versions')
del paths[versions_index:versions_index + 2]
# See if parent directory is a perfect match (e.g. a directory like "24 - 8x02 - Day 8_ 5_00P.M. - 6_00P.M")
if len(files) == 1:
for rx in standalone_episode_regexs:
res = re.findall(rx, paths[-1])
if len(res):
print "BABS: match on standalone_episode_regexs ", rx
show, junk, year, season, episode, junk, endEpisode, junk, title = res[0]
# If it didn't have a show, then grab it from the directory.
if len(show) == 0:
(show, year) = VideoFiles.CleanName(paths[0])
else:
(show, ignore) = VideoFiles.CleanName(show)
print "BABS: standalone, show from directory, show = |", show, "| year = |", year, "|"
episode = int(episode)
if len(endEpisode) > 0:
endEpisode = int(endEpisode)
else:
endEpisode = episode
for ep in range(episode, endEpisode+1):
tv_show = Media.Episode(show, season, ep, title, year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(files[0])
mediaList.append(tv_show)
done = True
break
if done == False:
# Not a perfect standalone match, so get information from directories. (e.g. "Lost/Season 1/s0101.mkv")
season = None
seasonNumber = None
(show, year) = VideoFiles.CleanName(paths[0])
print "BABS: not perfect match: show = |", show, "| year = |", year, "|"
# Which component looks like season?
if len(paths) >= 2:
print "BABS: len(paths) >= 2"
season = paths[len(paths)-1]
match = re.match(season_regex, season, re.IGNORECASE)
if match:
seasonNumber = int(match.group('season'))
print "BABS: matched season_regex with season = |", season, "| seasonNumber =", seasonNumber
print "BABS: before ends_with_episode, show = |", show, "|"
# Make sure an episode name didn't make it into the show.
for rx in ends_with_episode:
show = re.sub(rx, '', show)
print "BABS: after ends_with_episode, show = |", show, "|"
for i in files:
done = False
file = os.path.basename(i)
print "BABS: os.path.basename, i = |", i, "| file = |", file, "|"
(file, ext) = os.path.splitext(file)
print "BABS: os.path.splitext, file = |", file, "| ext = |", ext, "|"
if ext.lower() in ['.mp4', '.m4v', '.mov']:
print "BABS: try mp4 tags"
m4season = m4ep = m4year = 0
m4show = title = ''
try:
mp4fileTags = mp4file.Mp4File(i)
# Show.
try: m4show = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvshow').encode('utf-8')
except: pass
# Season.
try: m4season = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvseason'))
except: pass
# Episode.
m4ep = None
try:
# tracknum (can be 101)
m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tracknum'))
except:
try:
# tvepisodenum (can be S2E16)
m4ep = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisodenum')
except:
# TV Episode (can be 101)
m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisode'))
if m4ep is not None:
found = False
try:
# See if it matches regular expression.
for rx in episode_regexps:
match = re.search(rx, file, re.IGNORECASE)
if match:
m4season = int(match.group('season'))
m4ep = int(match.group('ep'))
found = True
if found == False and re.match('[0-9]+', str(m4ep)):
# Carefully convert to episode number.
m4ep = int(m4ep) % 100
elif found == False:
m4ep = int(re.findall('[0-9]+', m4ep)[0])
except:
pass
# Title.
try: title = find_data(mp4fileTags, 'moov/udta/meta/ilst/title').encode('utf-8')
except: pass
# Note: Dates/years embedded in episode files tend to be air or "recorded on" dates, which can
# mislead the agent when doing series matching, so we will no longer pass those up as hints here.
# If we have all the data we need, add it.
if len(m4show) > 0 and m4season > 0 and m4ep > 0:
tv_show = Media.Episode(m4show, m4season, m4ep, title, year)
tv_show.parts.append(i)
mediaList.append(tv_show)
continue
except:
pass
# Check for date-based regexps first.
for rx in date_regexps:
match = re.search(rx, file)
if match:
# Make sure there's not a stronger season/ep match for the same file.
try:
for r in episode_regexps + standalone_episode_regexs:
if re.search(r, file):
raise
except:
break
print "BABS: matched date_regexps ", rx
year = int(match.group('year'))
month = int(match.group('month'))
day = int(match.group('day'))
# Use the year as the season.
tv_show = Media.Episode(show, year, None, None, None)
tv_show.released_at = '%d-%02d-%02d' % (year, month, day)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
if done == False:
# Take the year out, because it's not going to help at this point.
cleanName, cleanYear = VideoFiles.CleanName(file)
if not year and cleanYear:
year = cleanYear
print "BABS: after CleanName file = |", file, "| cleanName = |", cleanName, "| cleanYear = |", cleanYear, "|"
if cleanYear != None:
file = file.replace(str(cleanYear), 'XXXX')
print "BABS: replaced year, file = |", file, "|"
# Minor cleaning on the file to avoid false matches on H.264, 720p, etc.
whackRx = ['([hHx][\.]?264)[^0-9]', '[^[0-9](720[pP])', '[^[0-9](1080[pP])', '[^[0-9](480[pP])']
for rx in whackRx:
file = re.sub(rx, ' ', file)
print "BABS: after whackRx, file = |", file, "|"
for rx in episode_regexps:
match = re.search(rx, file, re.IGNORECASE)
if match:
print "BABS: matched episode_regexps ", rx
# Parse season and episode.
the_season = match.group('season')
if the_season.lower() == 'sp':
the_season = 0
else:
the_season = int(the_season)
episode = int(match.group('ep'))
endEpisode = episode
if match.groupdict().has_key('secondEp') and match.group('secondEp'):
endEpisode = int(match.group('secondEp'))
for ep in range(episode, endEpisode+1):
tv_show = Media.Episode(show, the_season, ep, None, year)
tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
if done == False:
print "BABS: dealing with episode? file = |", file, "|"
# BABS -- Before we do CleanName, which will remove any dashes, etc. in filename, attempt to remove series name found in directory
file = re.sub(show, 'X', file)
print "BABS: before CleanName, remove show, show = |", show, "| file = |", file, "|"
# OK, next let's see if we're dealing with something that looks like an episode.
# Begin by cleaning the filename to remove garbage like "h.264" that could throw
# things off.
#
(file, fileYear) = VideoFiles.CleanName(file)
# if don't have a good year from before (when checking the parent folders) AND we just got a good year, use it.
if not year and fileYear:
year = fileYear
print "BABS: episodes: CleanName: file = |", file, "| year = |", year, "|"
# BABS -- And do it again, just in case the directory is off by things CleanName handles
file = re.sub(show, 'X', file)
print "BABS: after CleanName, remove show, show = |", show, "| file = |", file, "|"
for rx in just_episode_regexs:
episode_match = re.search(rx, file, re.IGNORECASE | re.UNICODE)
if episode_match is not None:
print "BABS: matched just_episode_regexs ", rx
the_episode = int(episode_match.group('ep'))
# Now look for a season.
if re.search(special_episode_regex, file, re.IGNORECASE | re.UNICODE):
the_season = 0
elif seasonNumber is not None:
the_season = seasonNumber
else:
the_season = 1
print "BABS: the_season =", the_season
print "BABS: show = |", show, "| the_episode =", the_episode
tv_show = Media.Episode(show, the_season, the_episode, None, year)
tv_show.parts.append(i)
mediaList.append(tv_show)
done = True
break
if done == False:
print "Got nothing for:", file
# Stack the results.
Stack.Scan(path, files, mediaList, subdirs)
def find_data(atom, name):
child = atomsearch.find_path(atom, name)
data_atom = child.find('data')
if data_atom and 'data' in data_atom.attrs:
return data_atom.attrs['data']
import sys
if __name__ == '__main__':
print "Hello, world!"
path = sys.argv[1]
files = [os.path.join(path, file) for file in os.listdir(path)]
media = []
Scan(path[1:], files, media, [])
print "Media:", media

169
commands/pyyg.py Normal file
View File

@ -0,0 +1,169 @@
import argparse
import json
import re
from urllib.parse import urlencode, urlparse
from bs4 import BeautifulSoup
from dns import rdatatype, resolver
from requests import Session, adapters
from urllib3.util.connection import HAS_IPV6
BLACKLIST_WORDS = ["dvd", "iso"]
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--uploader", action="append")
parser.add_argument("-y", "--year", type=int)
parser.add_argument("query")
args = parser.parse_args()
def parse_size(size):
units = {"o": 1, "Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
match = re.search("([0-9.]+)([^0-9]+)", size)
number = match.group(1).strip()
unit = match.group(2).strip()
return int(float(number) * units[unit])
DNS_RESOLVER = resolver.Resolver()
DNS_RESOLVER.cache = resolver.LRUCache() # type: ignore
class DNSAdapter(adapters.HTTPAdapter):
def __init__(self, nameservers):
self.nameservers = nameservers
super().__init__()
def resolve(self, host, nameservers):
DNS_RESOLVER.nameservers = nameservers
if HAS_IPV6:
try:
answers_v6 = DNS_RESOLVER.resolve(host, rdatatype.AAAA)
for rdata_v6 in answers_v6:
return f"[{str(rdata_v6)}]"
except resolver.NoAnswer:
pass
answers_v4 = DNS_RESOLVER.resolve(host, rdatatype.A)
for rdata_v4 in answers_v4:
return str(rdata_v4)
def send(self, request, **kwargs):
connection_pool_kwargs = self.poolmanager.connection_pool_kw
result = urlparse(request.url)
resolved_ip = self.resolve(result.hostname, self.nameservers)
request.url = request.url.replace(result.hostname, resolved_ip)
request.headers["Host"] = result.hostname
request.headers[
"User-Agent"
] = "Googlebot/2.1 (+http://www.google.com/bot.html)"
if result.scheme == "https":
connection_pool_kwargs["server_hostname"] = result.hostname
connection_pool_kwargs["assert_hostname"] = result.hostname
return super().send(request, **kwargs)
session = Session()
session.mount("http://", DNSAdapter(["1.1.1.1"]))
session.mount("https://", DNSAdapter(["1.1.1.1"]))
def get_files(id):
req = session.get(
"https://www5.yggtorrent.fi/engine/get_files", params={"torrent": id}
)
files = json.loads(req.text)
html = BeautifulSoup(files["html"], "html.parser")
trs = html.select("tr")
return len(trs)
def search_ygg(query, multi):
ygg_params = {
"name": f"{query} {args.year}",
"description": "",
"file": "",
"uploader": "",
"category": "2145",
"sub_category": "2183",
"do": "search",
"order": "asc",
"sort": "publish_date",
}
if multi:
ygg_params["option_langue"] = ["4"]
req = session.get("https://www5.yggtorrent.fi/engine/search", params=ygg_params)
html = BeautifulSoup(req.text, "html.parser")
trs = html.select("table.table tr")
if len(trs) > 1:
for i, tr in enumerate(trs):
if not i:
continue
tds = tr.find_all("td")
size = tds[5].get_text()
name = tds[1].get_text().lower().strip()
if parse_size(size) > parse_size("10Go"):
continue
if any(word.lower() in name for word in BLACKLIST_WORDS):
continue
if args.uploader and not any(
uploader.lower() in name for uploader in args.uploader
):
continue
link = tds[1].a["href"]
id = link.split("/")[-1].split("-")[0]
if get_files(id) > 1:
continue
print(f"{name} {args.year} {link}")
exit(0)
query_string = {"query": args.query, "filters": "type:movie"}
if args.year:
query_string["filters"] += " AND year:" + str(args.year)
tvdb = session.post(
"https://tvshowtime-dsn.algolia.net/1/indexes/TVDB/query",
params={
"x-algolia-application-id": "tvshowtime",
"x-algolia-api-key": "c9d5ec1316cec12f093754c69dd879d3",
},
json={"params": urlencode(query_string)},
)
tvdata = json.loads(tvdb.text)
if not tvdata["nbHits"] > 0:
print("Can't find query on TheTVDB")
exit(1)
eng = tvdata["hits"][0]["name"]
fra = (
tvdata["hits"][0]["translations"]["fra"]
if "fra" in tvdata["hits"][0]["translations"]
else args.query
)
search_ygg(args.query, True)
search_ygg(fra, True)
search_ygg(eng, True)
search_ygg(args.query, False)
search_ygg(fra, False)
search_ygg(eng, False)