Remove cloudscrapper
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Michel Roux 2021-01-07 21:19:50 +01:00
parent a769f7fddf
commit 4d6f7b1aba
11 changed files with 45 additions and 40 deletions

3
.dockerignore Normal file
View File

@ -0,0 +1,3 @@
.idea
.venv
.db

View File

@ -11,4 +11,4 @@ REQUESTS_TIMEOUT=5
CACHE_TIMEOUT=3600 CACHE_TIMEOUT=3600
MYSQL_ROOT_PASSWORD=root MYSQL_ROOT_PASSWORD=root
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1 CLOUDPROXY_ENDPOINT=http://flaresolverr:8191/v1

View File

@ -44,16 +44,10 @@ You have to install MariaDB (or any MySQL server) to be able to access the admin
YggTorrent use CloudFlare to protect them to DDoS attacks. YggTorrent use CloudFlare to protect them to DDoS attacks.
This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...* This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
Over time, CloudFlare will ask you systematically to prouve yourself. Over time, CloudFlare will ask you systematically to prove yourself.
To be able to see YggTorrent results, you have to have a FlareSolverr instance running.
CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) : Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation).
- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance.
- CAPTCHA *(not supported but maybe soon™)*
- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
For CloudProxy, you have to have an instance running.
Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed. If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.

View File

@ -9,7 +9,7 @@ services:
working_dir: /app working_dir: /app
depends_on: depends_on:
- db - db
- cloudproxy - flaresolverr
env_file: env_file:
- .env.dist - .env.dist
- .env - .env
@ -26,8 +26,8 @@ services:
volumes: volumes:
- ./.db:/var/lib/mysql - ./.db:/var/lib/mysql
cloudproxy: flaresolverr:
image: jbouhd/cloudproxy image: flaresolverr/flaresolverr
ports: ports:
- "8191:8191" - "8191:8191"
env_file: env_file:

View File

@ -5,14 +5,19 @@ from .core import Other
from .nyaa import Nyaa from .nyaa import Nyaa
from .pantsu import Pantsu from .pantsu import Pantsu
from .yggtorrent import YggTorrent, YggAnimation from .yggtorrent import YggTorrent, YggAnimation
from ..config import CLOUDPROXY_ENDPOINT
async def run_all(*args, **kwargs): async def run_all(*args, **kwargs):
return list(await gather(Nyaa(*args, **kwargs).run(), coroutines = [Nyaa(*args, **kwargs).run(),
Pantsu(*args, **kwargs).run(), Pantsu(*args, **kwargs).run(),
YggTorrent(*args, **kwargs).run(), AnimeUltime(*args, **kwargs).run()]
YggAnimation(*args, **kwargs).run(),
AnimeUltime(*args, **kwargs).run())) if CLOUDPROXY_ENDPOINT:
coroutines.extend([YggTorrent(*args, **kwargs).run(),
YggAnimation(*args, **kwargs).run()])
return list(await gather(*coroutines))
def get_instance(url, query): def get_instance(url, query):

View File

@ -12,6 +12,7 @@ class AnimeUltime(ConnectorCore):
favicon = 'animeultime.png' favicon = 'animeultime.png'
base_url = 'http://www.anime-ultime.net' base_url = 'http://www.anime-ultime.net'
is_light = True is_light = True
is_behind_cloudflare = False
def get_full_search_url(self): def get_full_search_url(self):
from_date = '' from_date = ''

View File

@ -8,13 +8,10 @@ from logging import getLogger
from urllib.parse import urlencode from urllib.parse import urlencode
import requests import requests
from cloudscraper import create_scraper
from cloudscraper.exceptions import CloudflareException, CaptchaException
from requests import RequestException from requests import RequestException
from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
cloudproxy_session = None cloudproxy_session = None
@ -80,9 +77,11 @@ ConnectorCache = Cache()
def curl_content(url, params=None, ajax=False, debug=True): def curl_content(url, params=None, ajax=False, debug=True):
from . import get_instance
output = '' output = ''
http_code = 500 http_code = 500
method = 'post' if (params is not None) else 'get' method = 'post' if (params is not None) else 'get'
instance = get_instance(url, '')
if ajax: if ajax:
headers = {'X-Requested-With': 'XMLHttpRequest'} headers = {'X-Requested-With': 'XMLHttpRequest'}
@ -90,15 +89,15 @@ def curl_content(url, params=None, ajax=False, debug=True):
headers = {} headers = {}
try: try:
if method == 'post': if not instance.is_behind_cloudflare:
response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) if method == 'post':
else: response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) else:
response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
output = response.text output = response.text
http_code = response.status_code http_code = response.status_code
except CloudflareException as e: elif CLOUDPROXY_ENDPOINT:
if CLOUDPROXY_ENDPOINT:
global cloudproxy_session global cloudproxy_session
if not cloudproxy_session: if not cloudproxy_session:
json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
@ -123,15 +122,12 @@ def curl_content(url, params=None, ajax=False, debug=True):
output = response['solution']['response'] output = response['solution']['response']
if http_code == 500: if http_code == 500:
json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'sessions.destroy', 'cmd': 'sessions.destroy',
'session': cloudproxy_session, 'session': cloudproxy_session,
})) }))
cloudproxy_session = None cloudproxy_session = None
except RequestException as e:
if debug and http_code != 200:
getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
except (RequestException, CaptchaException) as e:
if debug: if debug:
getLogger().exception(e) getLogger().exception(e)
@ -164,6 +160,11 @@ class ConnectorCore(ABC):
def is_light(self): def is_light(self):
pass pass
@property
@abstractmethod
def is_behind_cloudflare(self):
pass
def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH): def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH):
self.query = query self.query = query
self.data = [] self.data = []
@ -206,6 +207,7 @@ class Other(ConnectorCore):
favicon = 'blank.png' favicon = 'blank.png'
base_url = '' base_url = ''
is_light = True is_light = True
is_behind_cloudflare = False
def get_full_search_url(self): def get_full_search_url(self):
pass pass

View File

@ -10,6 +10,7 @@ class Nyaa(ConnectorCore):
favicon = 'nyaa.png' favicon = 'nyaa.png'
base_url = 'https://nyaa.si' base_url = 'https://nyaa.si'
is_light = False is_light = False
is_behind_cloudflare = False
def get_full_search_url(self): def get_full_search_url(self):
sort_type = 'size' sort_type = 'size'

View File

@ -10,6 +10,7 @@ class Pantsu(ConnectorCore):
favicon = 'pantsu.png' favicon = 'pantsu.png'
base_url = 'https://nyaa.net' base_url = 'https://nyaa.net'
is_light = False is_light = False
is_behind_cloudflare = False
def get_full_search_url(self): def get_full_search_url(self):
sort_type = 4 sort_type = 4

View File

@ -15,6 +15,7 @@ class YggTorrent(ConnectorCore):
base_url = 'https://www2.yggtorrent.si' base_url = 'https://www2.yggtorrent.si'
is_light = False is_light = False
category = 2179 category = 2179
is_behind_cloudflare = True
def get_full_search_url(self): def get_full_search_url(self):
sort_type = 'size' sort_type = 'size'

View File

@ -3,11 +3,8 @@ Flask-SQLAlchemy==2.4.4
Flask-HTTPAuth==4.2.0 Flask-HTTPAuth==4.2.0
Flask-WTF==0.14.3 Flask-WTF==0.14.3
WTForms==2.3.3 WTForms==2.3.3
PyMySQL==0.10.1 PyMySQL==1.0.0
requests==2.25.1 requests==2.25.1
beautifulsoup4==4.9.3 beautifulsoup4==4.9.3
python-dotenv==0.15.0 python-dotenv==0.15.0
cloudscraper==1.2.50
Js2Py==0.70
polling2==0.4.6
dateparser==1.0.0 dateparser==1.0.0