commit cba10eef5935036c8331588329f1a0a8e830d7c0 Author: XĂ©fir Destiny Date: Mon Nov 25 22:52:22 2019 +0100 First version ever diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c4196de --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.vscode/ +.venv/ +.db/ +__pycache__/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3eb8dea --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM debian + +RUN apt-get update ; apt-get -y upgrade ; \ + apt-get -y install python3 python3-flask python3-flask-sqlalchemy python3-pymysql python3-requests python3-bs4 phantomjs ; \ + rm -rf /var/lib/apt/lists/* diff --git a/app.py b/app.py new file mode 100644 index 0000000..82765d2 --- /dev/null +++ b/app.py @@ -0,0 +1,37 @@ +from core import app +from models import * +from flask import Response, request +from werkzeug.security import generate_password_hash, check_password_hash +from functools import wraps +from os import environ + + +# init HTTP basic auth +def check_auth(username, password): + # This function is called to check if a username / password combination is valid. + admin_username = environ.get('ADMIN_USERNAME', 'admin') + admin_password = environ.get('ADMIN_USERNAME', 'secret') + return username == admin_username and password == admin_password + + +def authenticate(): + # Sends a 401 response that enables basic auth + return Response( + 'Could not verify your access level for that URL.\n' + 'You have to login with proper credentials', 401, + {'WWW-Authenticate': 'Basic realm="Login Required"'}) + + +def requires_auth(f): + @wraps(f) + def decorated(*args, **kwargs): + auth = request.authorization + if not auth or not check_auth(auth.username, auth.password): + return authenticate() + return f(*args, **kwargs) + return decorated + + +@app.route('/') +def hello_world(): + return 'Hello World !' diff --git a/connectors.py b/connectors.py new file mode 100644 index 0000000..1178c2d --- /dev/null +++ b/connectors.py @@ -0,0 +1,385 @@ +from subprocess import run +from bs4 import BeautifulSoup +from abc import ABC, abstractmethod +from datetime import datetime, timedelta +from sys import platform +import re +import requests +import locale + + +class ConnectorException(Exception): + def __init__(self, connector_type): + super().__init__("Error, can't grab data from %s" % connector_type) + + +class Connector(ABC): + blacklist_words = ['Chris44', 'Vol.'] + + def __init__(self, query): + self.query = query + + @abstractmethod + def get_full_search_url(self, sort_type, page, category): + pass + + @abstractmethod + def search(self, sort_type, page, category): + pass + + @abstractmethod + def get_history(self, sort_type, page, category): + pass + + def curl_content(self, url, params={}, ajax=False): + if isinstance(self, YggTorrent): + try: + qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform is 'linux' else {} + qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env, shell=True, check=True, capture_output=True, timeout=7000) + output = qt_output.stdout + http_code = 200 + except Exception as e: + output = '' + http_code = 500 + else: + if ajax: + headers = {'X-Requested-With': 'XMLHttpRequest'} + else: + headers = {} + + if params: + response = requests.post(url, params, timeout=10, headers=headers) + else: + response = requests.get(url, timeout=10, headers=headers) + + output = response.text + http_code = response.status_code + + return {'http_code': http_code, 'output': output} + + def get_instance(self, url): + if 'nyaa.si' in url: + return Nyaa() + elif 'nyaa.net' in url: + return Pantsu() + elif 'anime-ultime' in url: + return AnimeUltime() + elif 'ygg' in url: + return YggTorrent() + else: + return Other() + + def get_lang(self, str_to_test): + if re.search('(vf|multi|french)', str_to_test, re.IGNORECASE): + return 'fr' + else: + return 'jp' + + def boldify(self, str_to_replace): + if self.query: + return re.sub('(%s)' % self.query, r'\1', str_to_replace, flags=re.IGNORECASE) + else: + return str_to_replace + + +class Nyaa(Connector): + color = 'is-link' + title = 'Nyaa' + favicon = 'nyaa.png' + base_url = 'https://nyaa.si' + default_sort = 'size' + + def get_full_search_url(self, sort_type=default_sort, page=1, category=None): + to_query = '(%s vf)|(%s vostfr)|(%s multi)|(%s french)' % (self.query, self.query, self.query, self.query) + return '%s/?f=0&c=1_3&s=%s&o=desc&q=%s&p=%s' % (self.base_url, sort_type, to_query, page) + + def get_history(self, sort_type=default_sort, page=1, category=None): + output = self.search(sort_type, page, category) + return output[0] + + def search(self, sort_type=default_sort, page=1, category=None): + data = [] + response = self.curl_content(self.get_full_search_url(sort_type, page)) + + if response['http_code'] is 200: + html = BeautifulSoup(response['output'], 'html.parser') + trs = html.select('table.torrent-list tr') + valid_trs = 0 + + for i, tr in enumerate(trs): + if not i: + continue + + tds = tr.findAll('td') + check_downloads = int(tds[7].string) + check_seeds = int(tds[5].string) + + if check_downloads or check_seeds: + urls = tds[1].findAll('a') + + if len(urls) > 1: + url = urls[1] + has_comment = True + else: + url = urls[0] + has_comment = False + + if any(url.string in word for word in self.blacklist_words): + continue + + valid_trs = valid_trs + 1 + + data.append({ + 'lang': self.get_lang(url.string), + 'href': '%s%s' % (self.base_url, url['href']), + 'name': self.boldify(url.string), + 'comment': str(urls[0]).replace('/view/', '%s%s' % (self.base_url, '/view/')) if has_comment else '', + 'link': tds[2].decode_contents().replace('/download/', '%s%s' % (self.base_url, '/download/')), + 'size': tds[3].string, + 'date': '%s:00' % tds[4].string, + 'seeds': check_seeds, + 'leechs': tds[6].string, + 'downloads': check_downloads, + 'class': 'is-%s' % tr['class'][0] + }) + + return (data, valid_trs is not len(trs)) + else: + raise ConnectorException(self.title) + return (data, False) + + +class Pantsu(Connector): + color = 'is-info' + title = 'Pantsu' + favicon = 'pantsu.png' + base_url = 'https://nyaa.net' + default_sort = 4 + + def get_full_search_url(self, sort_type=default_sort, page=1, category=None): + to_query = '(%s vf)|(%s vostfr)|(%s multi)|(%s french)' % (self.query, self.query, self.query, self.query) + return '%s/search/%s?c=3_13&order=false&q=%s&sort=%s' % (self.base_url, page, to_query, sort_type) + + def get_history(self, sort_type=default_sort, page=1, category=None): + output = self.search(sort_type, page, category) + return output[0] + + def search(self, sort_type=default_sort, page=1, category=None): + data = [] + response = self.curl_content(self.get_full_search_url(sort_type, page)) + + if response['http_code'] is 200: + html = BeautifulSoup(response['output'], 'html.parser') + trs = html.select('div.results tr') + valid_trs = 0 + + for i, tr in enumerate(trs): + if not i: + continue + + tds = tr.findAll('td') + check_downloads = int(tds[6].string.replace('-', '0')) + check_seeds = int(tds[4].string.replace('-', '0')) + + if check_downloads or check_seeds: + url = tds[1].a + + if any(url.string in word for word in self.blacklist_words): + continue + + valid_trs = valid_trs + 1 + + data.append({ + 'lang': self.get_lang(url.string), + 'href': '%s%s' % (self.base_url, url['href']), + 'name': self.boldify(url.string), + 'comment': '', + 'link': tds[2].decode_contents().replace('icon-magnet', 'fa fa-fw fa-magnet').replace('icon-floppy', 'fa fa-fw fa-download'), + 'size': tds[3].string, + 'date': datetime.strptime(tds[7]['title'], '%m/%d/%Y, %I:%M:%S %p %Z+0').strftime('%Y-%m-%d %H:%M:%S'), + 'seeds': check_seeds, + 'leechs': tds[5].string, + 'downloads': check_downloads, + 'class': 'is-%s' % tr['class'][0] + }) + + return (data, valid_trs is not len(trs)) + else: + raise ConnectorException(self.title) + return (data, False) + + +class YggTorrent(Connector): + color = 'is-success' + title = 'YggTorrent' + favicon = 'yggtorrent.png' + base_url = 'https://www2.yggtorrent.pe' + default_sort = 'size' + + def get_full_search_url(self, sort_type=default_sort, page=1, category=None): + if category is None: + raise ConnectorException(self.title) + + return '%s/engine/search?do=search&order=desc&sort=%s&category=2145&sub_category=%s&name=%s&page=%s' % (self.base_url, sort_type, category, self.query, page) + + def get_history(self, sort_type=default_sort, page=1, category=None): + if category is None: + raise ConnectorException(self.title) + + output = self.search(sort_type, page, category) + return output[0] + + def search(self, sort_type=default_sort, page=1, category=None): + if category is None: + raise ConnectorException(self.title) + + data = [] + response = self.curl_content(self.get_full_search_url(sort_type, page, category)) + + if response['http_code'] is 200: + html = BeautifulSoup(response['output'], 'html.parser') + trs = html.select('table.table tr') + valid_trs = 0 + + for i, tr in enumerate(trs): + if not i: + continue + + tds = tr.findAll('td') + check_downloads = int(tds[6].string) + check_seeds = int(tds[7].string) + + if check_downloads or check_seeds: + url = tds[1].a + + if any(url.string in word for word in self.blacklist_words): + continue + + valid_trs = valid_trs + 1 + + data.append({ + 'lang': self.get_lang(url.string), + 'href': url['href'], + 'name': self.boldify(url.string), + 'comment': '%s' % (url['href'], tds[3].string), + 'link': '' % (self.base_url, re.search('/(\d+)', url['href']).group(1)), + 'size': tds[5].string, + 'date': datetime.fromtimestamp(int(tds[4].div.string)).strftime('%Y-%m-%d %H:%M:%S'), + 'seeds': check_seeds, + 'leechs': tds[8].string, + 'downloads': check_downloads, + 'class': '' + }) + + return (data, valid_trs is len(trs)) + else: + raise ConnectorException(self.title) + return (data, False) + + +class AnimeUltime(Connector): + color = 'is-warning' + title = 'Anime-Ultime' + favicon = 'animeultime.png' + base_url = 'http://www.anime-ultime.net' + default_sort = 'search' + + def get_full_search_url(self, sort_type=default_sort, page=1, category=None): + if sort_type is 'history': + page_date = datetime.now() - timedelta((page-1)*365/12) + from_date = page_date.strftime('%m%Y') + else: + from_date = '' + + return '%s/%s-0-1/%s' % (self.base_url, sort_type, from_date) + + def search(self, sort_type=default_sort, page=1, category=None): + data = [] + response = self.curl_content(self.get_full_search_url(sort_type, page), {'search': self.query}) + + if response['http_code'] is 200: + html = BeautifulSoup(response['output'], 'html.parser') + title = html.select('div.title') + + if 'Recherche' in title[0].string: + trs = html.select('table.jtable tr') + + for i, tr in enumerate(trs): + if not i: + continue + + tds = tr.findAll('td') + + if len(tds) < 2: + continue + + url = tds[0].a + + data.append({ + 'lang': 'jp', + 'href': '%s/%s' % (self.base_url, url['href']), + 'name': url.decode_contents(), + 'type': tds[1].string + }) + else: + player = html.select('div.AUVideoPlayer') + name = html.select('h1') + ani_type = html.select('div.titre') + + data.append({ + 'lang': 'jp', + 'href': '%s%s' % (self.get_full_search_url('file'), player[0]['data-serie']), + 'name': self.boldify(name[0].string), + 'type': ani_type[0].string.replace(':', '') + }) + else: + raise ConnectorException(self.title) + return (data, False) + + def get_history(self, sort_type=default_sort, page=1, category=None): + data = [] + response = self.curl_content(self.get_full_search_url('history', page)) + + if response['http_code'] is 200: + html = BeautifulSoup(response['output'], 'html.parser') + tables = html.select('table.jtable') + h3s = html.findAll('h3') + + for i, table in enumerate(tables): + for j, tr in enumerate(table.findAll('tr')): + if not j: + continue + + tds = tr.findAll('td') + link = tds[0].a + + current_locale = locale.getlocale() + locale.setlocale(locale.LC_ALL, ('fr_FR', 'UTF-8')) + release_date = datetime.strptime(h3s[i].string, '%A %d %B %Y : ').strftime('%Y-%m-%d %H:%M:%S') + locale.setlocale(locale.LC_ALL, current_locale) + + data.append({ + 'lang': 'jp', + 'href': '%s%s' % (self.get_full_search_url('file'), link['href']), + 'name': link.string, + 'type': tds[4].string, + 'date': release_date + }) + else: + raise ConnectorException(self.title) + return data + + +class Other(Connector): + color = 'is-danger' + title = 'Other' + favicon = 'blank.png' + + def get_full_search_url(self, sort_type=None, page=1, category=None): + return '' + + def search(self, sort_type=None, page=1, category=None): + return ([], False) + + def get_history(self, sort_type, page, category): + return [] diff --git a/core.py b/core.py new file mode 100644 index 0000000..63df234 --- /dev/null +++ b/core.py @@ -0,0 +1,21 @@ +from sys import modules +from flask import Flask +from os import environ +from flask_sqlalchemy import SQLAlchemy +import pymysql + +modules["MySQLdb"] = pymysql +app = Flask(__name__) + +# init DB and migration +db_user = environ.get('MYSQL_USER') +db_password = environ.get('MYSQL_PASSWORD') +db_name = environ.get('MYSQL_DATABASE') +db_host = environ.get('MYSQL_HOSTNAME') +if not db_host or not db_user or not db_password or not db_name: + print('Missing connection environment variables') + exit() + +app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://%s:%s@%s/%s' % (db_user, db_password, db_host, db_name) +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True +db = SQLAlchemy(app) diff --git a/delay.js b/delay.js new file mode 100644 index 0000000..261466d --- /dev/null +++ b/delay.js @@ -0,0 +1,53 @@ +// https://stackoverflow.com/a/41017165 +"use strict"; +var page = require('webpage').create(), + system = require('system'), + mustQuit = false, + canShow = false, + underAttack = false, + address, delay; + +if (system.args.length < 3 || system.args.length > 5) { + console.log('Usage: delay.js URL delay'); + phantom.exit(1); +} else { + address = system.args[1]; + delay = system.args[2]; + + page.open(address, function (status) { + if (status !== 'success') { + phantom.exit(1); + } else { + window.setTimeout(function () { + if (underAttack && canShow) { + console.log(page.content); + phantom.exit(); + } else { + phantom.exit(503); + } + }, delay); + window.setTimeout(function () { + if (mustQuit) { + phantom.exit(429); + } else if (!underAttack && canShow) { + console.log(page.content); + phantom.exit(); + } + }, 1); + } + }); + + page.onResourceReceived = function (response) { + switch (response.status) { + case 200: + canShow = true; + break; + case 429: + mustQuit = true; + break; + case 503: + underAttack = true; + break; + } + }; +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..944ea90 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,30 @@ +version: "2.4" + +services: + app: + build: . + ports: + - "5000:5000" + command: flask run --host 0.0.0.0 + working_dir: /app + environment: + FLASK_APP: app.py + FLASK_ENV: development + MYSQL_USER: root + MYSQL_PASSWORD: root + MYSQL_DATABASE: www + MYSQL_HOSTNAME: db + ADMIN_USERNAME: admin + ADMIN_PASSWORD: secret + volumes: + - .:/app + + db: + image: mariadb + ports: + - "3306:3306" + environment: + MYSQL_ROOT_PASSWORD: root + MYSQL_DATABASE: www + volumes: + - ./.db:/var/lib/mysql diff --git a/models.py b/models.py new file mode 100644 index 0000000..5053f3b --- /dev/null +++ b/models.py @@ -0,0 +1,26 @@ +from core import db + + +class AnimeFolder(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.Text, unique=True, nullable=False) + titles = db.relationship("AnimeTitle", back_populates="folder") + + +class AnimeTitle(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.Text, unique=True, nullable=False) + keyword = db.Column(db.Text, nullable=False) + folder_id = db.Column(db.Integer, db.ForeignKey('folder.id')) + folder = db.relationship('AnimeFolder', back_populates="titles") + links = db.relationship('AnimeLink', back_populates="title") + + +class AnimeLink(db.Model): + id = db.Column(db.Integer, primary_key=True) + link = db.Column(db.Text, nullable=False) + season = db.Column(db.Text, nullable=False) + comment = db.Column(db.Text) + vf = db.Column(db.Boolean, nullable=False) + title_id = db.Column(db.Integer, db.ForeignKey('title.id')) + title = db.relationship('AnimeTitle', back_populates="links") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6e79b50 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +Flask==1.0.2 +Flask-SQLAlchemy==2.1 +PyMySQL==0.9.3 +requests==2.21.0 +beautifulsoup4==4.7.1 diff --git a/test.py b/test.py new file mode 100644 index 0000000..ff3e1c7 --- /dev/null +++ b/test.py @@ -0,0 +1,5 @@ +from connectors import AnimeUltime +from pprint import pprint + +test = AnimeUltime('conan') +pprint(test.search())