This repository has been archived on 2023-10-01. You can view files and clone it, but cannot push or open issues or pull requests.
PyNyaaTa/pynyaata/connectors/core.py

220 lines
6.7 KiB
Python
Raw Normal View History

2020-04-24 19:01:44 +00:00
import re
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
from functools import wraps
2020-10-21 20:13:57 +00:00
from json import dumps, loads
2020-04-24 19:01:44 +00:00
from logging import getLogger
2020-10-21 20:13:57 +00:00
from urllib.parse import urlencode
2020-04-24 19:01:44 +00:00
2020-10-21 20:13:57 +00:00
import requests
2020-04-24 19:01:44 +00:00
from requests import RequestException
2021-01-07 20:19:50 +00:00
from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
2020-04-24 19:01:44 +00:00
2020-10-21 20:54:37 +00:00
cloudproxy_session = None
2020-04-24 19:01:44 +00:00
class ConnectorReturn(Enum):
SEARCH = 1
HISTORY = 2
class ConnectorLang(Enum):
FR = '🇫🇷'
JP = '🇯🇵'
class Cache:
CACHE_DATA = {}
def cache_data(self, f):
@wraps(f)
def wrapper(*args, **kwds):
connector = args[0]
timestamp = datetime.now().timestamp()
for connector_class in list(self.CACHE_DATA):
for connector_func in list(self.CACHE_DATA[connector_class]):
for connector_query in list(self.CACHE_DATA[connector_class][connector_func]):
for connector_page in list(self.CACHE_DATA[connector_class][connector_func][connector_query]):
if self.CACHE_DATA[connector_class][connector_func][connector_query][connector_page][
'timeout'
] < timestamp:
del self.CACHE_DATA[connector_class][connector_func][connector_query][connector_page]
if connector.__class__.__name__ not in self.CACHE_DATA:
self.CACHE_DATA[connector.__class__.__name__] = {}
if f.__name__ not in self.CACHE_DATA[connector.__class__.__name__]:
self.CACHE_DATA[connector.__class__.__name__][f.__name__] = {}
if connector.query not in self.CACHE_DATA[connector.__class__.__name__][f.__name__]:
self.CACHE_DATA[connector.__class__.__name__][f.__name__][connector.query] = {}
if connector.page not in self.CACHE_DATA[connector.__class__.__name__][f.__name__][connector.query]:
self.CACHE_DATA[connector.__class__.__name__][f.__name__][connector.query][connector.page] = {
'timeout': 0.0
}
cached_data = self.CACHE_DATA[connector.__class__.__name__][f.__name__][connector.query][connector.page]
if cached_data['timeout'] > timestamp:
connector.data = cached_data['data']
connector.is_more = cached_data['is_more']
connector.on_error = False
return
ret = f(*args, **kwds)
if not connector.on_error:
self.CACHE_DATA[connector.__class__.__name__][f.__name__][connector.query][connector.page] = {
'data': connector.data,
'timeout': timestamp + CACHE_TIMEOUT,
'is_more': connector.is_more
}
return ret
return wrapper
ConnectorCache = Cache()
2020-07-25 14:27:21 +00:00
def curl_content(url, params=None, ajax=False, debug=True):
2021-01-07 20:19:50 +00:00
from . import get_instance
2020-10-21 20:13:57 +00:00
output = ''
http_code = 500
method = 'post' if (params is not None) else 'get'
2021-01-07 20:19:50 +00:00
instance = get_instance(url, '')
2020-10-21 20:13:57 +00:00
2020-04-24 19:01:44 +00:00
if ajax:
headers = {'X-Requested-With': 'XMLHttpRequest'}
else:
headers = {}
try:
2021-01-07 20:19:50 +00:00
if not instance.is_behind_cloudflare:
if method == 'post':
response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
else:
response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
output = response.text
http_code = response.status_code
elif CLOUDPROXY_ENDPOINT:
2020-10-21 21:01:15 +00:00
global cloudproxy_session
2020-10-21 20:54:37 +00:00
if not cloudproxy_session:
json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'sessions.create'
}))
response_session = loads(json_session.text)
cloudproxy_session = response_session['session']
2020-10-21 20:13:57 +00:00
headers['Content-Type'] = 'application/x-www-form-urlencoded' if (method == 'post') else 'application/json'
json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'request.%s' % method,
'url': url,
2020-10-21 20:54:37 +00:00
'session': cloudproxy_session,
2020-12-20 14:11:47 +00:00
'userAgent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0',
2020-10-21 20:13:57 +00:00
'postData': '%s' % urlencode(params) if (method == 'post') else ''
}))
http_code = json_response.status_code
2020-10-21 21:16:56 +00:00
response = loads(json_response.text)
if 'solution' in response:
output = response['solution']['response']
2020-10-21 20:13:57 +00:00
if http_code == 500:
2021-01-07 20:19:50 +00:00
requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'sessions.destroy',
'session': cloudproxy_session,
}))
cloudproxy_session = None
2021-01-07 20:19:50 +00:00
except RequestException as e:
2020-07-25 14:27:21 +00:00
if debug:
getLogger().exception(e)
2020-04-24 19:01:44 +00:00
return {'http_code': http_code, 'output': output}
class ConnectorCore(ABC):
@property
@abstractmethod
def color(self):
pass
@property
@abstractmethod
def title(self):
pass
@property
@abstractmethod
def favicon(self):
pass
@property
@abstractmethod
def base_url(self):
pass
@property
@abstractmethod
def is_light(self):
pass
2021-01-07 20:19:50 +00:00
@property
@abstractmethod
def is_behind_cloudflare(self):
pass
2020-04-24 19:01:44 +00:00
def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH):
self.query = query
self.data = []
self.is_more = False
self.on_error = True
self.page = page
self.return_type = return_type
@abstractmethod
def get_full_search_url(self):
pass
@abstractmethod
def search(self):
pass
@abstractmethod
def get_history(self):
pass
@staticmethod
def get_lang(str_to_test):
if re.search('(vf|multi|french)', str_to_test, re.IGNORECASE):
return ConnectorLang.FR
else:
return ConnectorLang.JP
2021-01-03 19:41:58 +00:00
async def run(self):
2020-04-24 19:01:44 +00:00
if self.on_error:
if self.return_type is ConnectorReturn.SEARCH:
self.search()
elif self.return_type is ConnectorReturn.HISTORY:
self.get_history()
return self
class Other(ConnectorCore):
color = 'is-danger'
title = 'Other'
favicon = 'blank.png'
base_url = ''
is_light = True
2021-01-07 20:19:50 +00:00
is_behind_cloudflare = False
2020-04-24 19:01:44 +00:00
def get_full_search_url(self):
pass
def search(self):
pass
def get_history(self):
pass