This repository has been archived on 2023-10-01. You can view files and clone it, but cannot push or open issues or pull requests.
PyNyaaTa/connectors.py

406 lines
14 KiB
Python
Raw Normal View History

2019-11-30 15:52:13 +00:00
import locale
import re
2019-11-25 21:52:22 +00:00
from abc import ABC, abstractmethod
from datetime import datetime, timedelta
2019-11-30 15:52:13 +00:00
from enum import Enum
from subprocess import run
2019-11-25 21:52:22 +00:00
from sys import platform
2019-11-30 15:52:13 +00:00
2019-11-25 21:52:22 +00:00
import requests
2019-11-30 15:52:13 +00:00
from bs4 import BeautifulSoup
2019-11-25 21:52:22 +00:00
2019-11-30 15:52:13 +00:00
class ConnectorReturn(Enum):
SEARCH = 1
HISTORY = 2
2019-11-25 21:52:22 +00:00
class Connector(ABC):
blacklist_words = ['Chris44', 'Vol.']
2019-11-30 15:52:13 +00:00
def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH, category=None):
2019-11-25 21:52:22 +00:00
self.query = query
2019-11-30 15:52:13 +00:00
self.category = category
self.data = []
self.is_more = False
2019-11-30 17:09:30 +00:00
self.on_error = True
2019-11-30 15:52:13 +00:00
self.page = page
self.return_type = return_type
2019-11-25 21:52:22 +00:00
@abstractmethod
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
2019-11-25 21:52:22 +00:00
pass
@abstractmethod
2019-11-30 17:09:30 +00:00
def search(self):
2019-11-25 21:52:22 +00:00
pass
2019-11-30 17:09:30 +00:00
def get_history(self):
2019-11-25 21:52:22 +00:00
pass
2019-11-30 15:52:13 +00:00
def run(self):
2019-11-30 17:09:30 +00:00
if self.on_error:
2019-11-30 15:52:13 +00:00
if self.return_type is ConnectorReturn.SEARCH:
2019-11-30 17:09:30 +00:00
self.search()
2019-11-30 15:52:13 +00:00
elif self.return_type is ConnectorReturn.HISTORY:
2019-11-30 17:09:30 +00:00
self.get_history()
2019-11-30 15:52:13 +00:00
return self
2019-11-28 15:07:26 +00:00
def curl_content(self, url, params=None, ajax=False):
2019-11-25 21:52:22 +00:00
if isinstance(self, YggTorrent):
try:
qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform is 'linux' else {}
2019-11-28 15:07:26 +00:00
qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env,
shell=True, check=True, capture_output=True, timeout=7000)
2019-11-25 21:52:22 +00:00
output = qt_output.stdout
http_code = 200
except Exception as e:
output = ''
http_code = 500
2019-11-28 15:07:26 +00:00
print(e)
2019-11-25 21:52:22 +00:00
else:
if ajax:
headers = {'X-Requested-With': 'XMLHttpRequest'}
else:
headers = {}
2019-11-28 15:07:26 +00:00
if params is not None:
2019-11-25 21:52:22 +00:00
response = requests.post(url, params, timeout=10, headers=headers)
else:
response = requests.get(url, timeout=10, headers=headers)
output = response.text
http_code = response.status_code
return {'http_code': http_code, 'output': output}
2019-11-28 15:07:26 +00:00
@staticmethod
def get_instance(url, query):
2019-11-25 21:52:22 +00:00
if 'nyaa.si' in url:
2019-11-28 15:07:26 +00:00
return Nyaa(query)
2019-11-25 21:52:22 +00:00
elif 'nyaa.net' in url:
2019-11-28 15:07:26 +00:00
return Pantsu(query)
2019-11-25 21:52:22 +00:00
elif 'anime-ultime' in url:
2019-11-28 15:07:26 +00:00
return AnimeUltime(query)
2019-11-25 21:52:22 +00:00
elif 'ygg' in url:
2019-11-28 15:07:26 +00:00
return YggTorrent(query)
2019-11-25 21:52:22 +00:00
else:
2019-11-28 15:07:26 +00:00
return Other(query)
2019-11-25 21:52:22 +00:00
2019-11-28 15:07:26 +00:00
@staticmethod
def get_lang(str_to_test):
2019-11-25 21:52:22 +00:00
if re.search('(vf|multi|french)', str_to_test, re.IGNORECASE):
return 'fr'
else:
return 'jp'
def boldify(self, str_to_replace):
if self.query:
return re.sub('(%s)' % self.query, r'<b>\1</b>', str_to_replace, flags=re.IGNORECASE)
else:
return str_to_replace
class Nyaa(Connector):
color = 'is-link'
title = 'Nyaa'
favicon = 'nyaa.png'
base_url = 'https://nyaa.si'
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
sort_type = 'size'
if self.return_type is ConnectorReturn.HISTORY:
sort_type = 'date'
2019-11-25 21:52:22 +00:00
to_query = '(%s vf)|(%s vostfr)|(%s multi)|(%s french)' % (self.query, self.query, self.query, self.query)
2019-11-30 15:52:13 +00:00
return '%s/?f=0&c=1_3&s=%s&o=desc&q=%s&p=%s' % (self.base_url, sort_type, to_query, self.page)
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def get_history(self):
self.search()
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def search(self):
if self.on_error:
response = self.curl_content(self.get_full_search_url())
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if response['http_code'] is 200:
html = BeautifulSoup(response['output'], 'html.parser')
trs = html.select('table.torrent-list tr')
valid_trs = 0
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
for i, tr in enumerate(trs):
if not i:
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
tds = tr.findAll('td')
check_downloads = int(tds[7].string)
check_seeds = int(tds[5].string)
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if check_downloads or check_seeds:
urls = tds[1].findAll('a')
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if len(urls) > 1:
url = urls[1]
has_comment = True
else:
url = urls[0]
has_comment = False
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if any(url.string in word for word in self.blacklist_words):
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
valid_trs = valid_trs + 1
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
self.data.append({
'lang': self.get_lang(url.string),
'href': '%s%s' % (self.base_url, url['href']),
'name': self.boldify(url.string),
'comment': str(urls[0]).replace('/view/',
'%s%s' % (self.base_url, '/view/')) if has_comment else '',
'link': tds[2].decode_contents().replace('/download/',
'%s%s' % (self.base_url, '/download/')),
'size': tds[3].string,
'date': '%s:00' % tds[4].string,
'seeds': check_seeds,
'leechs': tds[6].string,
'downloads': check_downloads,
'class': 'is-%s' % tr['class'][0]
})
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
self.on_error = False
self.is_more = valid_trs is not len(trs)
2019-11-25 21:52:22 +00:00
class Pantsu(Connector):
color = 'is-info'
title = 'Pantsu'
favicon = 'pantsu.png'
base_url = 'https://nyaa.net'
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
sort_type = 4
if self.return_type is ConnectorReturn.HISTORY:
sort_type = 2
2019-11-25 21:52:22 +00:00
to_query = '(%s vf)|(%s vostfr)|(%s multi)|(%s french)' % (self.query, self.query, self.query, self.query)
2019-11-30 15:52:13 +00:00
return '%s/search/%s?c=3_13&order=false&q=%s&sort=%s' % (self.base_url, self.page, to_query, sort_type)
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def get_history(self):
self.search()
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def search(self):
if self.on_error:
response = self.curl_content(self.get_full_search_url())
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if response['http_code'] is 200:
html = BeautifulSoup(response['output'], 'html.parser')
trs = html.select('div.results tr')
valid_trs = 0
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
for i, tr in enumerate(trs):
if not i:
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
tds = tr.findAll('td')
check_downloads = int(tds[6].string.replace('-', '0'))
check_seeds = int(tds[4].string.replace('-', '0'))
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if check_downloads or check_seeds:
url = tds[1].a
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if any(url.string in word for word in self.blacklist_words):
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
valid_trs = valid_trs + 1
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
self.data.append({
'lang': self.get_lang(url.string),
'href': '%s%s' % (self.base_url, url['href']),
'name': self.boldify(url.string),
'comment': '',
'link': tds[2].decode_contents()
.replace('icon-magnet', 'fa fa-fw fa-magnet')
.replace('icon-floppy', 'fa fa-fw fa-download'),
'size': tds[3].string,
'date': datetime
.strptime(tds[7]['title'], '%m/%d/%Y, %I:%M:%S %p %Z+0')
.strftime('%Y-%m-%d %H:%M:%S'),
'seeds': check_seeds,
'leechs': tds[5].string,
'downloads': check_downloads,
'class': 'is-%s' % tr['class'][0]
})
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
self.on_error = False
self.is_more = valid_trs is not len(trs)
2019-11-25 21:52:22 +00:00
class YggTorrent(Connector):
color = 'is-success'
title = 'YggTorrent'
favicon = 'yggtorrent.png'
base_url = 'https://www2.yggtorrent.pe'
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
sort_type = 'size'
if self.return_type is ConnectorReturn.HISTORY:
sort_type = 'date'
2019-11-25 21:52:22 +00:00
2019-11-28 15:07:26 +00:00
return '%s/engine/search?do=search&order=desc&sort=%s&category=2145&sub_category=%s&name=%s&page=%s' % (
2019-11-30 15:52:13 +00:00
self.base_url, sort_type, self.category, self.query, self.page
2019-11-28 17:06:28 +00:00
)
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def get_history(self):
self.search()
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def search(self):
if self.category and self.on_error:
2019-11-30 15:52:13 +00:00
response = self.curl_content(self.get_full_search_url())
2019-11-25 21:52:22 +00:00
2019-11-30 15:52:13 +00:00
if response['http_code'] is 200:
html = BeautifulSoup(response['output'], 'html.parser')
trs = html.select('table.table tr')
valid_trs = 0
2019-11-25 21:52:22 +00:00
2019-11-30 15:52:13 +00:00
for i, tr in enumerate(trs):
if not i:
2019-11-25 21:52:22 +00:00
continue
2019-11-30 15:52:13 +00:00
tds = tr.findAll('td')
check_downloads = int(tds[6].string)
check_seeds = int(tds[7].string)
if check_downloads or check_seeds:
url = tds[1].a
if any(url.string in word for word in self.blacklist_words):
continue
valid_trs = valid_trs + 1
self.data.append({
'lang': self.get_lang(url.string),
'href': url['href'],
'name': self.boldify(url.string),
'comment': '<a href="%s#comm" target="_blank"><i class="fa fa-comments-o"></i>%s</a>' %
(url['href'], tds[3].string),
2019-11-30 17:09:30 +00:00
'link': '<a href="%s/engine/download_torrent?id=%s">'
'<i class="fa fa-fw fa-download"></i>'
'</a>' % (self.base_url, re.search(r'/(\d+)', url['href']).group(1)),
2019-11-30 15:52:13 +00:00
'size': tds[5].string,
'date': datetime.fromtimestamp(int(tds[4].div.string)).strftime('%Y-%m-%d %H:%M:%S'),
'seeds': check_seeds,
'leechs': tds[8].string,
'downloads': check_downloads,
'class': ''
})
2019-11-30 17:09:30 +00:00
self.on_error = False
2019-11-30 15:52:13 +00:00
self.is_more = valid_trs is not len(trs)
2019-11-25 21:52:22 +00:00
class AnimeUltime(Connector):
color = 'is-warning'
title = 'Anime-Ultime'
favicon = 'animeultime.png'
base_url = 'http://www.anime-ultime.net'
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
from_date = ''
sort_type = 'search'
if self.return_type is ConnectorReturn.HISTORY:
page_date = datetime.now() - timedelta((self.page - 1) * 365 / 12)
2019-11-25 21:52:22 +00:00
from_date = page_date.strftime('%m%Y')
2019-11-30 15:52:13 +00:00
sort_type = 'history'
2019-11-25 21:52:22 +00:00
return '%s/%s-0-1/%s' % (self.base_url, sort_type, from_date)
2019-11-30 17:09:30 +00:00
def search(self):
if self.on_error:
response = self.curl_content(self.get_full_search_url(), {'search': self.query})
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if response['http_code'] is 200:
html = BeautifulSoup(response['output'], 'html.parser')
title = html.select('div.title')
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if 'Recherche' in title[0].string:
trs = html.select('table.jtable tr')
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
for i, tr in enumerate(trs):
if not i:
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
tds = tr.findAll('td')
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if len(tds) < 2:
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
url = tds[0].a
self.data.append({
'lang': 'jp',
'href': '%s/%s' % (self.base_url, url['href']),
'name': url.decode_contents(),
'type': tds[1].string
})
else:
player = html.select('div.AUVideoPlayer')
name = html.select('h1')
ani_type = html.select('div.titre')
2019-11-25 21:52:22 +00:00
2019-11-30 15:52:13 +00:00
self.data.append({
2019-11-25 21:52:22 +00:00
'lang': 'jp',
2019-11-30 17:09:30 +00:00
'href': '%s/file-0-1/%s' % (self.base_url, player[0]['data-serie']),
'name': self.boldify(name[0].string),
'type': ani_type[0].string.replace(':', '')
2019-11-25 21:52:22 +00:00
})
2019-11-30 17:09:30 +00:00
self.on_error = False
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def get_history(self):
if self.on_error:
response = self.curl_content(self.get_full_search_url())
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
if response['http_code'] is 200:
html = BeautifulSoup(response['output'], 'html.parser')
tables = html.select('table.jtable')
h3s = html.findAll('h3')
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
for i, table in enumerate(tables):
for j, tr in enumerate(table.findAll('tr')):
if not j:
continue
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
tds = tr.findAll('td')
link = tds[0].a
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
current_locale = locale.getlocale()
locale.setlocale(locale.LC_ALL, ('fr_FR', 'UTF-8'))
release_date = datetime.strptime(h3s[i].string, '%A %d %B %Y : ').strftime('%Y-%m-%d %H:%M:%S')
locale.setlocale(locale.LC_ALL, current_locale)
self.data.append({
'lang': 'jp',
'href': '%s/%s' % (self.base_url, link['href']),
'name': link.string,
'type': tds[4].string,
'date': release_date
})
self.on_error = False
2019-11-28 15:07:26 +00:00
2019-11-25 21:52:22 +00:00
class Other(Connector):
color = 'is-danger'
title = 'Other'
favicon = 'blank.png'
2019-11-30 15:52:13 +00:00
def get_full_search_url(self):
pass
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def search(self):
2019-11-30 15:52:13 +00:00
pass
2019-11-25 21:52:22 +00:00
2019-11-30 17:09:30 +00:00
def get_history(self):
2019-11-30 15:52:13 +00:00
pass