Fix new cloudflare challenge
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Michel Roux 2020-05-14 18:54:06 +02:00
parent 38952ae747
commit 72857d4d6d
4 changed files with 72 additions and 5 deletions

View File

@ -7,5 +7,7 @@ MYSQL_DATABASE=nyaa
MYSQL_SERVER=db
ADMIN_USERNAME=admin
ADMIN_PASSWORD=secret
REQUESTS_TIMEOUT=5
CACHE_TIMEOUT=3600
MYSQL_ROOT_PASSWORD=root
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain

View File

@ -13,6 +13,7 @@ ADMIN_USERNAME = environ.get('ADMIN_USERNAME', 'admin')
ADMIN_PASSWORD = generate_password_hash(environ.get('ADMIN_PASSWORD', 'secret'))
APP_PORT = environ.get('FLASK_PORT', 5000)
CACHE_TIMEOUT = environ.get('CACHE_TIMEOUT', 60 * 60)
REQUESTS_TIMEOUT = environ.get('REQUESTS_TIMEOUT', 5)
BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
MYSQL_ENABLED = False

View File

@ -0,0 +1,64 @@
import re
from collections import OrderedDict
from urllib.parse import urlparse
from cloudscraper import CloudScraper, CloudflareIUAMError, JavaScriptInterpreter
class CloudScraperWrapper(CloudScraper):
def IUAM_Challenge_Response(self, body, url, interpreter):
try:
formPayload = re.search(
r'<form (?P<form>.*?="challenge-form" '
r'action="(?P<challengeUUID>.*?'
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
body,
re.M | re.DOTALL
).groupdict()
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict()
for challengeParam in re.findall(r'<input\s(.*?)>', formPayload['form']):
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
if inputPayload.get('name') != "jschl_vc":
payload.update({inputPayload['name']: inputPayload['value']})
elif inputPayload.get('name') == "jschl_vc" and "jschl_vc" not in payload:
payload.update({inputPayload['name']: inputPayload['value']})
except AttributeError:
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
hostParsed = urlparse(url)
try:
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
interpreter
).solveChallenge(body, hostParsed.netloc)
except Exception as e:
self.simpleException(
CloudflareIUAMError,
'Unable to parse Cloudflare anti-bots page: {}'.format(
getattr(e, 'message', e)
)
)
return {
'url': '{}://{}{}'.format(
hostParsed.scheme,
hostParsed.netloc,
self.unescape(formPayload['challengeUUID'])
),
'data': payload
}

View File

@ -5,13 +5,13 @@ from enum import Enum
from functools import wraps
from logging import getLogger
from cloudscraper import create_scraper
from cloudscraper.exceptions import CloudflareException
from requests import RequestException
from ..config import CACHE_TIMEOUT, IS_DEBUG
from .cloudscraper import CloudScraperWrapper
from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT
scraper = create_scraper(browser={
scraper = CloudScraperWrapper.create_scraper(browser={
'custom': 'ScraperBot/1.0'
})
@ -85,9 +85,9 @@ def curl_content(url, params=None, ajax=False):
try:
if params is not None:
response = scraper.post(url, params, timeout=5, headers=headers)
response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
else:
response = scraper.get(url, timeout=5, headers=headers)
response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
output = response.text
http_code = response.status_code