From 4d6f7b1aba4fec9494b16973b10bd0c152aefffb Mon Sep 17 00:00:00 2001 From: Michel Roux Date: Thu, 7 Jan 2021 21:19:50 +0100 Subject: [PATCH] Remove cloudscrapper --- .dockerignore | 3 +++ .env.dist | 2 +- README.md | 14 ++++-------- docker-compose.yml | 6 ++--- pynyaata/connectors/__init__.py | 15 ++++++++----- pynyaata/connectors/animeultime.py | 1 + pynyaata/connectors/core.py | 36 ++++++++++++++++-------------- pynyaata/connectors/nyaa.py | 1 + pynyaata/connectors/pantsu.py | 1 + pynyaata/connectors/yggtorrent.py | 1 + requirements.txt | 5 +---- 11 files changed, 45 insertions(+), 40 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e5c0e11 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +.idea +.venv +.db diff --git a/.env.dist b/.env.dist index b0d60fb..365940f 100644 --- a/.env.dist +++ b/.env.dist @@ -11,4 +11,4 @@ REQUESTS_TIMEOUT=5 CACHE_TIMEOUT=3600 MYSQL_ROOT_PASSWORD=root BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain -CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1 +CLOUDPROXY_ENDPOINT=http://flaresolverr:8191/v1 diff --git a/README.md b/README.md index bfa3940..b5923d7 100644 --- a/README.md +++ b/README.md @@ -44,16 +44,10 @@ You have to install MariaDB (or any MySQL server) to be able to access the admin YggTorrent use CloudFlare to protect them to DDoS attacks. This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...* -Over time, CloudFlare will ask you systematically to prouve yourself. - -CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) : -- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations -- CAPTCHA *(not supported but maybe soon™)* -- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy) - -For CloudProxy, you have to have an instance running. -Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker). -After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance. +Over time, CloudFlare will ask you systematically to prove yourself. +To be able to see YggTorrent results, you have to have a FlareSolverr instance running. +Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation). +After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance. If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed. diff --git a/docker-compose.yml b/docker-compose.yml index e82c6e1..0c7c9c7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,7 +9,7 @@ services: working_dir: /app depends_on: - db - - cloudproxy + - flaresolverr env_file: - .env.dist - .env @@ -26,8 +26,8 @@ services: volumes: - ./.db:/var/lib/mysql - cloudproxy: - image: jbouhd/cloudproxy + flaresolverr: + image: flaresolverr/flaresolverr ports: - "8191:8191" env_file: diff --git a/pynyaata/connectors/__init__.py b/pynyaata/connectors/__init__.py index 9f8d1db..ad2d839 100644 --- a/pynyaata/connectors/__init__.py +++ b/pynyaata/connectors/__init__.py @@ -5,14 +5,19 @@ from .core import Other from .nyaa import Nyaa from .pantsu import Pantsu from .yggtorrent import YggTorrent, YggAnimation +from ..config import CLOUDPROXY_ENDPOINT async def run_all(*args, **kwargs): - return list(await gather(Nyaa(*args, **kwargs).run(), - Pantsu(*args, **kwargs).run(), - YggTorrent(*args, **kwargs).run(), - YggAnimation(*args, **kwargs).run(), - AnimeUltime(*args, **kwargs).run())) + coroutines = [Nyaa(*args, **kwargs).run(), + Pantsu(*args, **kwargs).run(), + AnimeUltime(*args, **kwargs).run()] + + if CLOUDPROXY_ENDPOINT: + coroutines.extend([YggTorrent(*args, **kwargs).run(), + YggAnimation(*args, **kwargs).run()]) + + return list(await gather(*coroutines)) def get_instance(url, query): diff --git a/pynyaata/connectors/animeultime.py b/pynyaata/connectors/animeultime.py index 28fc636..f7347df 100644 --- a/pynyaata/connectors/animeultime.py +++ b/pynyaata/connectors/animeultime.py @@ -12,6 +12,7 @@ class AnimeUltime(ConnectorCore): favicon = 'animeultime.png' base_url = 'http://www.anime-ultime.net' is_light = True + is_behind_cloudflare = False def get_full_search_url(self): from_date = '' diff --git a/pynyaata/connectors/core.py b/pynyaata/connectors/core.py index 02f1a1e..0c0f45f 100644 --- a/pynyaata/connectors/core.py +++ b/pynyaata/connectors/core.py @@ -8,13 +8,10 @@ from logging import getLogger from urllib.parse import urlencode import requests -from cloudscraper import create_scraper -from cloudscraper.exceptions import CloudflareException, CaptchaException from requests import RequestException -from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT +from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT -scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG) cloudproxy_session = None @@ -80,9 +77,11 @@ ConnectorCache = Cache() def curl_content(url, params=None, ajax=False, debug=True): + from . import get_instance output = '' http_code = 500 method = 'post' if (params is not None) else 'get' + instance = get_instance(url, '') if ajax: headers = {'X-Requested-With': 'XMLHttpRequest'} @@ -90,15 +89,15 @@ def curl_content(url, params=None, ajax=False, debug=True): headers = {} try: - if method == 'post': - response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) - else: - response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) + if not instance.is_behind_cloudflare: + if method == 'post': + response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) + else: + response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) - output = response.text - http_code = response.status_code - except CloudflareException as e: - if CLOUDPROXY_ENDPOINT: + output = response.text + http_code = response.status_code + elif CLOUDPROXY_ENDPOINT: global cloudproxy_session if not cloudproxy_session: json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ @@ -123,15 +122,12 @@ def curl_content(url, params=None, ajax=False, debug=True): output = response['solution']['response'] if http_code == 500: - json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ + requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ 'cmd': 'sessions.destroy', 'session': cloudproxy_session, })) cloudproxy_session = None - - if debug and http_code != 200: - getLogger().exception('%s\n\n%s' % (str(e), json_response.text)) - except (RequestException, CaptchaException) as e: + except RequestException as e: if debug: getLogger().exception(e) @@ -164,6 +160,11 @@ class ConnectorCore(ABC): def is_light(self): pass + @property + @abstractmethod + def is_behind_cloudflare(self): + pass + def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH): self.query = query self.data = [] @@ -206,6 +207,7 @@ class Other(ConnectorCore): favicon = 'blank.png' base_url = '' is_light = True + is_behind_cloudflare = False def get_full_search_url(self): pass diff --git a/pynyaata/connectors/nyaa.py b/pynyaata/connectors/nyaa.py index ffd3775..c1ef269 100644 --- a/pynyaata/connectors/nyaa.py +++ b/pynyaata/connectors/nyaa.py @@ -10,6 +10,7 @@ class Nyaa(ConnectorCore): favicon = 'nyaa.png' base_url = 'https://nyaa.si' is_light = False + is_behind_cloudflare = False def get_full_search_url(self): sort_type = 'size' diff --git a/pynyaata/connectors/pantsu.py b/pynyaata/connectors/pantsu.py index 83f069d..64abd3d 100644 --- a/pynyaata/connectors/pantsu.py +++ b/pynyaata/connectors/pantsu.py @@ -10,6 +10,7 @@ class Pantsu(ConnectorCore): favicon = 'pantsu.png' base_url = 'https://nyaa.net' is_light = False + is_behind_cloudflare = False def get_full_search_url(self): sort_type = 4 diff --git a/pynyaata/connectors/yggtorrent.py b/pynyaata/connectors/yggtorrent.py index e727b2b..deef0b0 100644 --- a/pynyaata/connectors/yggtorrent.py +++ b/pynyaata/connectors/yggtorrent.py @@ -15,6 +15,7 @@ class YggTorrent(ConnectorCore): base_url = 'https://www2.yggtorrent.si' is_light = False category = 2179 + is_behind_cloudflare = True def get_full_search_url(self): sort_type = 'size' diff --git a/requirements.txt b/requirements.txt index 9760648..d1014f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,11 +3,8 @@ Flask-SQLAlchemy==2.4.4 Flask-HTTPAuth==4.2.0 Flask-WTF==0.14.3 WTForms==2.3.3 -PyMySQL==0.10.1 +PyMySQL==1.0.0 requests==2.25.1 beautifulsoup4==4.9.3 python-dotenv==0.15.0 -cloudscraper==1.2.50 -Js2Py==0.70 -polling2==0.4.6 dateparser==1.0.0