From 92adf237a193535a2b52c039ec0576bdc68def89 Mon Sep 17 00:00:00 2001
From: Michel Roux <xefir@crystalyx.net>
Date: Sun, 13 Nov 2022 17:14:26 +0100
Subject: [PATCH] Remove BABS and add pyyg

---
 Dockerfile       |   3 +-
 commands/BABS.py | 408 -----------------------------------------------
 commands/pyyg.py | 169 ++++++++++++++++++++
 3 files changed, 171 insertions(+), 409 deletions(-)
 delete mode 100755 commands/BABS.py
 create mode 100644 commands/pyyg.py
diff --git a/Dockerfile b/Dockerfile
index 923b25f..48cfa9a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,8 @@ FROM linuxserver/ffmpeg:4.4-cli-ls65
 RUN apt-get update && \
     apt-get install -y \
     vim p7zip* git rsync lftp speedtest-cli rename megatools wget curl procps psmisc \
-    openssh-client transmission-cli python3-requests python3-pip && \
+    openssh-client transmission-cli python3-pip \
+    python3-requests python3-bs4 python3-dnspython && \
     rm -rf /var/lib/apt/lists/*
 RUN pip3 install yt-dlp && \
     curl -sSL https://raw.githubusercontent.com/tremc/tremc/master/tremc -o /usr/local/bin/tremc && \
diff --git a/commands/BABS.py b/commands/BABS.py
deleted file mode 100755
index 75c5b19..0000000
--- a/commands/BABS.py
+++ /dev/null
@@ -1,408 +0,0 @@
-# coding=utf-8
-#
-# Most code here is copyright (c) 2010 Plex Development Team. All rights reserved.
-#
-# Better ABsolute Scanner based on default scanner code from PMS 0.9.3.5 for Ubuntu
-# 2011-10-15 by jmjf (on Plex Forums)
-#
-# a version of the Plex Series Scanner that does a better job of dealing with absolute numbered files
-# and addresses the problem of series with numbers in the the name.
-#
-# Place this file in /var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Scanners/Series
-# /var/lib/plexmediaserver is a.k.a. ~/ to the plex user
-# you may have to create the Series subdirectory
-#
-# This code has only been tested on Ubuntu 10.4.2 with PMS 0.9.3.5 and my media collection. Use at your own risk.
-# That said, I expect it will work on other platforms.
-#
-# all debug messages are left in -- will only show on console -- include BABS: at beginning of line
-# all changes except debug messages are called out with comments begining # BABS --
-#
-# Other modifications by Xefir Destiny
-#
-import re, os, os.path
-import Media, VideoFiles, Stack, Utils
-from mp4file import mp4file, atomsearch
-
-episode_regexps = [
-  '(?P<show>.*?)[vVsS](?P<season>[0-9]{1,2})[\._ ]*[eE](?P<ep>[0-9]+)([- ]?[Ee+](?P<secondEp>[0-9]+))?', # S03E04-E05
-  '(?P<show>.*?)[vVsS](?P<season>[0-9]{1,2})[\._\- ]+(?P<ep>[0-9]+)',                                    # S03-03
-  '(?P<show>.*?)([^0-9]|^)(?P<season>[0-9]{1,2})[Xx](?P<ep>[0-9]+)(-[0-9]+[Xx](?P<secondEp>[0-9]+))?',   # 3x03
-  '(.*?)(^|[\._\- ])+(?P<season>sp)[\._ ]*(?P<ep>[0-9]{1,3})([\._\- ]|$)+',                              # SP01 (Special 01, equivalent to S00E01)
-]
-# BABS -- Removed the ".602." expresion from the list above. See default scanner to recover it.
-
-date_regexps = [
-  '(?P<year>[0-9]{4})[^0-9a-zA-Z]+(?P<month>[0-9]{2})[^0-9a-zA-Z]+(?P<day>[0-9]{2})([^0-9]|$)',        # 2009-02-10
-  '(?P<month>[0-9]{2})[^0-9a-zA-Z]+(?P<day>[0-9]{2})[^0-9a-zA-Z(]+(?P<year>[0-9]{4})([^0-9a-zA-Z]|$)', # 02-10-2009
-]
-
-standalone_episode_regexs = [
-  '(.*?)( \(([0-9]+)\))? - ([0-9]+)+x([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?',    # Newzbin style, no _UNPACK_
-  '(.*?)( \(([0-9]+)\))?[Ss]([0-9]+)+[Ee]([0-9]+)(-[0-9]+[Xx]([0-9]+))?( - (.*))?' # standard s00e00
-]
-
-season_regex = '.*?(?P<season>[0-9]{1,2})+(?![^\(\[]*[\)\]])' # folder for a season
-
-just_episode_regexs = [
-  '(?P<ep>[0-9]{1,3})[\. -_]*of[\. -_]*[0-9]{1,3}', # 01 of 08
-  '^(?P<ep>[0-9]{1,3})[^0-9]',                      # 01 - Foo
-  '(^|[ \.\-_])[eé](p{0,1}|(pisode){0,1})[ \.\-_]*(?P<ep>[0-9]{1,3})([^0-9c-uw-z%]|$)', # Blah Blah ep234
-  '.*?[ \.\-_](?P<ep>[0-9]{2,3})[^0-9c-uw-z%]+',    # Flah - 04 - Blah
-  '.*?[ \.\-_](?P<ep>[0-9]{2,3})$',                 # Flah - 04
-  '.*?[^0-9x](?P<ep>[0-9]{2,3})$',                  # Flah707
-  '^(?P<ep>[0-9]{1,3})$',                           # 01
-  '.*?[^s](?P<ep>[0-9]{1,3}).*$',                   # Fallback
-]
-# BABS -- modified "Blah Blah ep234" expression to only look for e, ep, or episode -- original scanner looked for e followed by 0 or more a-z
-
-special_episode_regex = 'special|spécial|oav|oad|ova|ncop|opening|nced|ending|trailer|promo|others|extra|film|movie|bonus'
-
-ends_with_number = '.*([0-9]{1,2})$'
-
-ends_with_episode = ['[ ]*[0-9]{1,2}x[0-9]{1,3}$', '[ ]*S[0-9]+E[0-9]+$']
-
-# Look for episodes.
-def Scan(path, files, mediaList, subdirs, language=None, root=None):
-
-  print "BABS: Scan"
-
-  # Scan for video files.
-  VideoFiles.Scan(path, files, mediaList, subdirs, root)
-
-  # Take top two as show/season, but require at least the top one.
-  paths = Utils.SplitPath(path)
-
-  if len(paths) == 1 and len(paths[0]) == 0:
-
-    print "BABS: len(paths) == 1 and len(paths[0]) == 0"
-
-    # Run the select regexps we allow at the top level.
-    for i in files:
-      file = os.path.basename(i)
-
-      print "BABS: tlrs os.path.basename, i = |", i, "| file = |", file, "|"
-
-      for rx in episode_regexps:
-        match = re.search(rx, file, re.IGNORECASE)
-        if match:
-
-          print "BABS: matched episode_regexps: ", rx
-
-          # Extract data.
-          show = match.group('show') if match.groupdict().has_key('show') else ''
-          season = match.group('season')
-          if season.lower() == 'sp':
-            season = 0
-          else:
-            season = int(season)
-          episode = int(match.group('ep'))
-          endEpisode = episode
-          if match.groupdict().has_key('secondEp') and match.group('secondEp'):
-            endEpisode = int(match.group('secondEp'))
-
-          # Clean title.
-          name, year = VideoFiles.CleanName(show)
-          if len(name) > 0:
-            for ep in range(episode, endEpisode+1):
-              tv_show = Media.Episode(name, season, ep, '', year)
-              tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
-              tv_show.parts.append(i)
-              mediaList.append(tv_show)
-
-  elif len(paths) > 0 and len(paths[0]) > 0:
-
-    print "BABS: len(paths) > 0 and len(paths[0]) > 0"
-
-    done = False
-
-    # If we're inside a Plex Versions directory, remove it and the quality directory from consideration.
-    if 'Plex Versions' in paths and len(paths) > 2:
-      versions_index = paths.index('Plex Versions')
-      del paths[versions_index:versions_index + 2]
-
-    # See if parent directory is a perfect match (e.g. a directory like "24 - 8x02 - Day 8_ 5_00P.M. - 6_00P.M")
-    if len(files) == 1:
-      for rx in standalone_episode_regexs:
-        res = re.findall(rx, paths[-1])
-        if len(res):
-
-          print "BABS: match on standalone_episode_regexs ", rx
-
-          show, junk, year, season, episode, junk, endEpisode, junk, title = res[0]
-
-          # If it didn't have a show, then grab it from the directory.
-          if len(show) == 0:
-            (show, year) = VideoFiles.CleanName(paths[0])
-          else:
-            (show, ignore) = VideoFiles.CleanName(show)
-
-            print "BABS: standalone, show from directory, show = |", show, "| year = |", year, "|"
-
-          episode = int(episode)
-          if len(endEpisode) > 0:
-            endEpisode = int(endEpisode)
-          else:
-            endEpisode = episode
-
-          for ep in range(episode, endEpisode+1):
-            tv_show = Media.Episode(show, season, ep, title, year)
-            tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
-            tv_show.parts.append(files[0])
-            mediaList.append(tv_show)
-
-          done = True
-          break
-
-    if done == False:
-
-      # Not a perfect standalone match, so get information from directories. (e.g. "Lost/Season 1/s0101.mkv")
-      season = None
-      seasonNumber = None
-
-      (show, year) = VideoFiles.CleanName(paths[0])
-
-      print "BABS: not perfect match: show = |", show, "| year = |", year, "|"
-
-      # Which component looks like season?
-      if len(paths) >= 2:
-
-        print "BABS: len(paths) >= 2"
-
-        season = paths[len(paths)-1]
-        match = re.match(season_regex, season, re.IGNORECASE)
-        if match:
-          seasonNumber = int(match.group('season'))
-
-          print "BABS: matched season_regex with season = |", season, "| seasonNumber =", seasonNumber
-
-      print "BABS: before ends_with_episode, show = |", show, "|"
-
-      # Make sure an episode name didn't make it into the show.
-      for rx in ends_with_episode:
-        show = re.sub(rx, '', show)
-
-      print "BABS: after ends_with_episode, show = |", show, "|"
-
-      for i in files:
-        done = False
-        file = os.path.basename(i)
-
-        print "BABS: os.path.basename, i = |", i, "| file = |", file, "|"
-
-        (file, ext) = os.path.splitext(file)
-
-        print "BABS: os.path.splitext, file = |", file, "| ext = |", ext, "|"
-
-        if ext.lower() in ['.mp4', '.m4v', '.mov']:
-
-          print "BABS: try mp4 tags"
-
-          m4season = m4ep = m4year = 0
-          m4show = title = ''
-          try:
-            mp4fileTags = mp4file.Mp4File(i)
-
-            # Show.
-            try: m4show = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvshow').encode('utf-8')
-            except: pass
-
-            # Season.
-            try: m4season = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvseason'))
-            except: pass
-
-            # Episode.
-            m4ep = None
-            try:
-              # tracknum (can be 101)
-              m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tracknum'))
-            except:
-              try:
-                # tvepisodenum (can be S2E16)
-                m4ep = find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisodenum')
-              except:
-                # TV Episode (can be 101)
-                m4ep = int(find_data(mp4fileTags, 'moov/udta/meta/ilst/tvepisode'))
-
-            if m4ep is not None:
-              found = False
-              try:
-                # See if it matches regular expression.
-                for rx in episode_regexps:
-                  match = re.search(rx, file, re.IGNORECASE)
-                  if match:
-                    m4season = int(match.group('season'))
-                    m4ep = int(match.group('ep'))
-                    found = True
-
-                if found == False and re.match('[0-9]+', str(m4ep)):
-                  # Carefully convert to episode number.
-                  m4ep = int(m4ep) % 100
-                elif found == False:
-                  m4ep = int(re.findall('[0-9]+', m4ep)[0])
-              except:
-                pass
-
-            # Title.
-            try: title = find_data(mp4fileTags, 'moov/udta/meta/ilst/title').encode('utf-8')
-            except: pass
-
-            # Note: Dates/years embedded in episode files tend to be air or "recorded on" dates, which can
-            # mislead the agent when doing series matching, so we will no longer pass those up as hints here.
-
-            # If we have all the data we need, add it.
-            if len(m4show) > 0 and m4season > 0 and m4ep > 0:
-              tv_show = Media.Episode(m4show, m4season, m4ep, title, year)
-              tv_show.parts.append(i)
-              mediaList.append(tv_show)
-              continue
-
-          except:
-            pass
-
-        # Check for date-based regexps first.
-        for rx in date_regexps:
-          match = re.search(rx, file)
-          if match:
-
-           # Make sure there's not a stronger season/ep match for the same file.
-            try:
-              for r in episode_regexps + standalone_episode_regexs:
-                if re.search(r, file):
-                  raise
-            except:
-              break
-
-            print "BABS: matched date_regexps ", rx
-
-            year = int(match.group('year'))
-            month = int(match.group('month'))
-            day = int(match.group('day'))
-
-            # Use the year as the season.
-            tv_show = Media.Episode(show, year, None, None, None)
-            tv_show.released_at = '%d-%02d-%02d' % (year, month, day)
-            tv_show.parts.append(i)
-            mediaList.append(tv_show)
-
-            done = True
-            break
-
-        if done == False:
-
-          # Take the year out, because it's not going to help at this point.
-          cleanName, cleanYear = VideoFiles.CleanName(file)
-          if not year and cleanYear:
-            year = cleanYear
-
-          print "BABS: after CleanName file = |", file, "| cleanName = |", cleanName, "| cleanYear = |", cleanYear, "|"
-
-          if cleanYear != None:
-            file = file.replace(str(cleanYear), 'XXXX')
-
-            print "BABS: replaced year, file = |", file, "|"
-
-          # Minor cleaning on the file to avoid false matches on H.264, 720p, etc.
-          whackRx = ['([hHx][\.]?264)[^0-9]', '[^[0-9](720[pP])', '[^[0-9](1080[pP])', '[^[0-9](480[pP])']
-          for rx in whackRx:
-            file = re.sub(rx, ' ', file)
-
-          print "BABS: after whackRx, file = |", file, "|"
-
-          for rx in episode_regexps:
-
-            match = re.search(rx, file, re.IGNORECASE)
-            if match:
-
-              print "BABS: matched episode_regexps ", rx
-
-              # Parse season and episode.
-              the_season = match.group('season')
-              if the_season.lower() == 'sp':
-                the_season = 0
-              else:
-                the_season = int(the_season)
-              episode = int(match.group('ep'))
-              endEpisode = episode
-              if match.groupdict().has_key('secondEp') and match.group('secondEp'):
-                endEpisode = int(match.group('secondEp'))
-
-              for ep in range(episode, endEpisode+1):
-                tv_show = Media.Episode(show, the_season, ep, None, year)
-                tv_show.display_offset = (ep-episode)*100/(endEpisode-episode+1)
-                tv_show.parts.append(i)
-                mediaList.append(tv_show)
-
-              done = True
-              break
-
-        if done == False:
-
-          print "BABS: dealing with episode? file = |", file, "|"
-
-          # BABS -- Before we do CleanName, which will remove any dashes, etc. in filename, attempt to remove series name found in directory
-          file = re.sub(show, 'X', file)
-          print "BABS: before CleanName, remove show, show = |", show, "| file = |", file, "|"
-
-          # OK, next let's see if we're dealing with something that looks like an episode.
-          # Begin by cleaning the filename to remove garbage like "h.264" that could throw
-          # things off.
-          #
-          (file, fileYear) = VideoFiles.CleanName(file)
-
-          # if don't have a good year from before (when checking the parent folders) AND we just got a good year, use it.
-          if not year and fileYear:
-            year = fileYear
-
-          print "BABS: episodes: CleanName: file = |", file, "| year = |", year, "|"
-
-          # BABS -- And do it again, just in case the directory is off by things CleanName handles
-          file = re.sub(show, 'X', file)
-          print "BABS: after CleanName, remove show, show = |", show, "| file = |", file, "|"
-
-          for rx in just_episode_regexs:
-            episode_match = re.search(rx, file, re.IGNORECASE | re.UNICODE)
-            if episode_match is not None:
-
-              print "BABS: matched just_episode_regexs ", rx
-
-              the_episode = int(episode_match.group('ep'))
-
-              # Now look for a season.
-              if re.search(special_episode_regex, file, re.IGNORECASE | re.UNICODE):
-                the_season = 0
-              elif seasonNumber is not None:
-                the_season = seasonNumber
-              else:
-                the_season = 1
-
-              print "BABS: the_season =", the_season
-              print "BABS: show = |", show, "| the_episode =", the_episode
-
-              tv_show = Media.Episode(show, the_season, the_episode, None, year)
-              tv_show.parts.append(i)
-              mediaList.append(tv_show)
-              done = True
-              break
-
-        if done == False:
-          print "Got nothing for:", file
-
-  # Stack the results.
-  Stack.Scan(path, files, mediaList, subdirs)
-
-def find_data(atom, name):
-  child = atomsearch.find_path(atom, name)
-  data_atom = child.find('data')
-  if data_atom and 'data' in data_atom.attrs:
-    return data_atom.attrs['data']
-
-import sys
-
-if __name__ == '__main__':
-  print "Hello, world!"
-  path = sys.argv[1]
-  files = [os.path.join(path, file) for file in os.listdir(path)]
-  media = []
-  Scan(path[1:], files, media, [])
-  print "Media:", media
diff --git a/commands/pyyg.py b/commands/pyyg.py
new file mode 100644
index 0000000..1e689b6
--- /dev/null
+++ b/commands/pyyg.py
@@ -0,0 +1,169 @@
+import argparse
+import json
+import re
+from urllib.parse import urlencode, urlparse
+
+from bs4 import BeautifulSoup
+from dns import rdatatype, resolver
+from requests import Session, adapters
+from urllib3.util.connection import HAS_IPV6
+
+BLACKLIST_WORDS = ["dvd", "iso"]
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-u", "--uploader", action="append")
+parser.add_argument("-y", "--year", type=int)
+parser.add_argument("query")
+args = parser.parse_args()
+
+
+def parse_size(size):
+    units = {"o": 1, "Ko": 10**3, "Mo": 10**6, "Go": 10**9, "To": 10**12}
+    match = re.search("([0-9.]+)([^0-9]+)", size)
+    number = match.group(1).strip()
+    unit = match.group(2).strip()
+    return int(float(number) * units[unit])
+
+
+DNS_RESOLVER = resolver.Resolver()
+DNS_RESOLVER.cache = resolver.LRUCache()  # type: ignore
+
+
+class DNSAdapter(adapters.HTTPAdapter):
+    def __init__(self, nameservers):
+        self.nameservers = nameservers
+        super().__init__()
+
+    def resolve(self, host, nameservers):
+        DNS_RESOLVER.nameservers = nameservers
+
+        if HAS_IPV6:
+            try:
+                answers_v6 = DNS_RESOLVER.resolve(host, rdatatype.AAAA)
+                for rdata_v6 in answers_v6:
+                    return f"[{str(rdata_v6)}]"
+            except resolver.NoAnswer:
+                pass
+
+        answers_v4 = DNS_RESOLVER.resolve(host, rdatatype.A)
+        for rdata_v4 in answers_v4:
+            return str(rdata_v4)
+
+    def send(self, request, **kwargs):
+        connection_pool_kwargs = self.poolmanager.connection_pool_kw
+        result = urlparse(request.url)
+        resolved_ip = self.resolve(result.hostname, self.nameservers)
+        request.url = request.url.replace(result.hostname, resolved_ip)
+        request.headers["Host"] = result.hostname
+        request.headers[
+            "User-Agent"
+        ] = "Googlebot/2.1 (+http://www.google.com/bot.html)"
+
+        if result.scheme == "https":
+            connection_pool_kwargs["server_hostname"] = result.hostname
+            connection_pool_kwargs["assert_hostname"] = result.hostname
+
+        return super().send(request, **kwargs)
+
+
+session = Session()
+session.mount("http://", DNSAdapter(["1.1.1.1"]))
+session.mount("https://", DNSAdapter(["1.1.1.1"]))
+
+
+def get_files(id):
+    req = session.get(
+        "https://www5.yggtorrent.fi/engine/get_files", params={"torrent": id}
+    )
+    files = json.loads(req.text)
+    html = BeautifulSoup(files["html"], "html.parser")
+    trs = html.select("tr")
+    return len(trs)
+
+
+def search_ygg(query, multi):
+    ygg_params = {
+        "name": f"{query} {args.year}",
+        "description": "",
+        "file": "",
+        "uploader": "",
+        "category": "2145",
+        "sub_category": "2183",
+        "do": "search",
+        "order": "asc",
+        "sort": "publish_date",
+    }
+
+    if multi:
+        ygg_params["option_langue"] = ["4"]
+
+    req = session.get("https://www5.yggtorrent.fi/engine/search", params=ygg_params)
+    html = BeautifulSoup(req.text, "html.parser")
+    trs = html.select("table.table tr")
+
+    if len(trs) > 1:
+        for i, tr in enumerate(trs):
+            if not i:
+                continue
+
+            tds = tr.find_all("td")
+            size = tds[5].get_text()
+            name = tds[1].get_text().lower().strip()
+
+            if parse_size(size) > parse_size("10Go"):
+                continue
+
+            if any(word.lower() in name for word in BLACKLIST_WORDS):
+                continue
+
+            if args.uploader and not any(
+                uploader.lower() in name for uploader in args.uploader
+            ):
+                continue
+
+            link = tds[1].a["href"]
+            id = link.split("/")[-1].split("-")[0]
+
+            if get_files(id) > 1:
+                continue
+
+            print(f"{name} {args.year} {link}")
+            exit(0)
+
+
+query_string = {"query": args.query, "filters": "type:movie"}
+
+if args.year:
+    query_string["filters"] += " AND year:" + str(args.year)
+
+tvdb = session.post(
+    "https://tvshowtime-dsn.algolia.net/1/indexes/TVDB/query",
+    params={
+        "x-algolia-application-id": "tvshowtime",
+        "x-algolia-api-key": "c9d5ec1316cec12f093754c69dd879d3",
+    },
+    json={"params": urlencode(query_string)},
+)
+
+tvdata = json.loads(tvdb.text)
+
+if not tvdata["nbHits"] > 0:
+    print("Can't find query on TheTVDB")
+    exit(1)
+
+eng = tvdata["hits"][0]["name"]
+
+fra = (
+    tvdata["hits"][0]["translations"]["fra"]
+    if "fra" in tvdata["hits"][0]["translations"]
+    else args.query
+)
+
+
+search_ygg(args.query, True)
+search_ygg(fra, True)
+search_ygg(eng, True)
+search_ygg(args.query, False)
+search_ygg(fra, False)
+search_ygg(eng, False)