From 72857d4d6d8a68d5dfae3b3155b46b820a035e00 Mon Sep 17 00:00:00 2001 From: Michel Roux Date: Thu, 14 May 2020 18:54:06 +0200 Subject: [PATCH] Fix new cloudflare challenge --- .env.dist | 2 + pynyaata/config.py | 1 + pynyaata/connectors/cloudscraper.py | 64 +++++++++++++++++++++++++++++ pynyaata/connectors/core.py | 10 ++--- 4 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 pynyaata/connectors/cloudscraper.py diff --git a/.env.dist b/.env.dist index 30c6ec1..b3d839d 100644 --- a/.env.dist +++ b/.env.dist @@ -7,5 +7,7 @@ MYSQL_DATABASE=nyaa MYSQL_SERVER=db ADMIN_USERNAME=admin ADMIN_PASSWORD=secret +REQUESTS_TIMEOUT=5 +CACHE_TIMEOUT=3600 MYSQL_ROOT_PASSWORD=root BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain diff --git a/pynyaata/config.py b/pynyaata/config.py index 94d0eb9..3622c30 100644 --- a/pynyaata/config.py +++ b/pynyaata/config.py @@ -13,6 +13,7 @@ ADMIN_USERNAME = environ.get('ADMIN_USERNAME', 'admin') ADMIN_PASSWORD = generate_password_hash(environ.get('ADMIN_PASSWORD', 'secret')) APP_PORT = environ.get('FLASK_PORT', 5000) CACHE_TIMEOUT = environ.get('CACHE_TIMEOUT', 60 * 60) +REQUESTS_TIMEOUT = environ.get('REQUESTS_TIMEOUT', 5) BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else [] MYSQL_ENABLED = False diff --git a/pynyaata/connectors/cloudscraper.py b/pynyaata/connectors/cloudscraper.py new file mode 100644 index 0000000..18429f3 --- /dev/null +++ b/pynyaata/connectors/cloudscraper.py @@ -0,0 +1,64 @@ +import re +from collections import OrderedDict +from urllib.parse import urlparse + +from cloudscraper import CloudScraper, CloudflareIUAMError, JavaScriptInterpreter + + +class CloudScraperWrapper(CloudScraper): + + def IUAM_Challenge_Response(self, body, url, interpreter): + try: + formPayload = re.search( + r'
.*?="challenge-form" ' + r'action="(?P.*?' + r'__cf_chl_jschl_tk__=\S+)"(.*?))', + body, + re.M | re.DOTALL + ).groupdict() + + if not all(key in formPayload for key in ['form', 'challengeUUID']): + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." + ) + + payload = OrderedDict() + for challengeParam in re.findall(r'', formPayload['form']): + inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam)) + + if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']: + if inputPayload.get('name') != "jschl_vc": + + payload.update({inputPayload['name']: inputPayload['value']}) + elif inputPayload.get('name') == "jschl_vc" and "jschl_vc" not in payload: + payload.update({inputPayload['name']: inputPayload['value']}) + + except AttributeError: + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." + ) + + hostParsed = urlparse(url) + + try: + payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport( + interpreter + ).solveChallenge(body, hostParsed.netloc) + except Exception as e: + self.simpleException( + CloudflareIUAMError, + 'Unable to parse Cloudflare anti-bots page: {}'.format( + getattr(e, 'message', e) + ) + ) + + return { + 'url': '{}://{}{}'.format( + hostParsed.scheme, + hostParsed.netloc, + self.unescape(formPayload['challengeUUID']) + ), + 'data': payload + } diff --git a/pynyaata/connectors/core.py b/pynyaata/connectors/core.py index 7376a4e..22b95af 100644 --- a/pynyaata/connectors/core.py +++ b/pynyaata/connectors/core.py @@ -5,13 +5,13 @@ from enum import Enum from functools import wraps from logging import getLogger -from cloudscraper import create_scraper from cloudscraper.exceptions import CloudflareException from requests import RequestException -from ..config import CACHE_TIMEOUT, IS_DEBUG +from .cloudscraper import CloudScraperWrapper +from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT -scraper = create_scraper(browser={ +scraper = CloudScraperWrapper.create_scraper(browser={ 'custom': 'ScraperBot/1.0' }) @@ -85,9 +85,9 @@ def curl_content(url, params=None, ajax=False): try: if params is not None: - response = scraper.post(url, params, timeout=5, headers=headers) + response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) else: - response = scraper.get(url, timeout=5, headers=headers) + response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) output = response.text http_code = response.status_code