Fix new cloudflare challenge
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
38952ae747
commit
72857d4d6d
@ -7,5 +7,7 @@ MYSQL_DATABASE=nyaa
|
|||||||
MYSQL_SERVER=db
|
MYSQL_SERVER=db
|
||||||
ADMIN_USERNAME=admin
|
ADMIN_USERNAME=admin
|
||||||
ADMIN_PASSWORD=secret
|
ADMIN_PASSWORD=secret
|
||||||
|
REQUESTS_TIMEOUT=5
|
||||||
|
CACHE_TIMEOUT=3600
|
||||||
MYSQL_ROOT_PASSWORD=root
|
MYSQL_ROOT_PASSWORD=root
|
||||||
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
|
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
|
||||||
|
@ -13,6 +13,7 @@ ADMIN_USERNAME = environ.get('ADMIN_USERNAME', 'admin')
|
|||||||
ADMIN_PASSWORD = generate_password_hash(environ.get('ADMIN_PASSWORD', 'secret'))
|
ADMIN_PASSWORD = generate_password_hash(environ.get('ADMIN_PASSWORD', 'secret'))
|
||||||
APP_PORT = environ.get('FLASK_PORT', 5000)
|
APP_PORT = environ.get('FLASK_PORT', 5000)
|
||||||
CACHE_TIMEOUT = environ.get('CACHE_TIMEOUT', 60 * 60)
|
CACHE_TIMEOUT = environ.get('CACHE_TIMEOUT', 60 * 60)
|
||||||
|
REQUESTS_TIMEOUT = environ.get('REQUESTS_TIMEOUT', 5)
|
||||||
BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
|
BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
|
||||||
MYSQL_ENABLED = False
|
MYSQL_ENABLED = False
|
||||||
|
|
||||||
|
64
pynyaata/connectors/cloudscraper.py
Normal file
64
pynyaata/connectors/cloudscraper.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import re
|
||||||
|
from collections import OrderedDict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from cloudscraper import CloudScraper, CloudflareIUAMError, JavaScriptInterpreter
|
||||||
|
|
||||||
|
|
||||||
|
class CloudScraperWrapper(CloudScraper):
|
||||||
|
|
||||||
|
def IUAM_Challenge_Response(self, body, url, interpreter):
|
||||||
|
try:
|
||||||
|
formPayload = re.search(
|
||||||
|
r'<form (?P<form>.*?="challenge-form" '
|
||||||
|
r'action="(?P<challengeUUID>.*?'
|
||||||
|
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
|
||||||
|
body,
|
||||||
|
re.M | re.DOTALL
|
||||||
|
).groupdict()
|
||||||
|
|
||||||
|
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = OrderedDict()
|
||||||
|
for challengeParam in re.findall(r'<input\s(.*?)>', formPayload['form']):
|
||||||
|
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
||||||
|
|
||||||
|
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
||||||
|
if inputPayload.get('name') != "jschl_vc":
|
||||||
|
|
||||||
|
payload.update({inputPayload['name']: inputPayload['value']})
|
||||||
|
elif inputPayload.get('name') == "jschl_vc" and "jschl_vc" not in payload:
|
||||||
|
payload.update({inputPayload['name']: inputPayload['value']})
|
||||||
|
|
||||||
|
except AttributeError:
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||||
|
)
|
||||||
|
|
||||||
|
hostParsed = urlparse(url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
|
||||||
|
interpreter
|
||||||
|
).solveChallenge(body, hostParsed.netloc)
|
||||||
|
except Exception as e:
|
||||||
|
self.simpleException(
|
||||||
|
CloudflareIUAMError,
|
||||||
|
'Unable to parse Cloudflare anti-bots page: {}'.format(
|
||||||
|
getattr(e, 'message', e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': '{}://{}{}'.format(
|
||||||
|
hostParsed.scheme,
|
||||||
|
hostParsed.netloc,
|
||||||
|
self.unescape(formPayload['challengeUUID'])
|
||||||
|
),
|
||||||
|
'data': payload
|
||||||
|
}
|
@ -5,13 +5,13 @@ from enum import Enum
|
|||||||
from functools import wraps
|
from functools import wraps
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
|
||||||
from cloudscraper import create_scraper
|
|
||||||
from cloudscraper.exceptions import CloudflareException
|
from cloudscraper.exceptions import CloudflareException
|
||||||
from requests import RequestException
|
from requests import RequestException
|
||||||
|
|
||||||
from ..config import CACHE_TIMEOUT, IS_DEBUG
|
from .cloudscraper import CloudScraperWrapper
|
||||||
|
from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT
|
||||||
|
|
||||||
scraper = create_scraper(browser={
|
scraper = CloudScraperWrapper.create_scraper(browser={
|
||||||
'custom': 'ScraperBot/1.0'
|
'custom': 'ScraperBot/1.0'
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -85,9 +85,9 @@ def curl_content(url, params=None, ajax=False):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if params is not None:
|
if params is not None:
|
||||||
response = scraper.post(url, params, timeout=5, headers=headers)
|
response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
|
||||||
else:
|
else:
|
||||||
response = scraper.get(url, timeout=5, headers=headers)
|
response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
|
||||||
|
|
||||||
output = response.text
|
output = response.text
|
||||||
http_code = response.status_code
|
http_code = response.status_code
|
||||||
|
Reference in New Issue
Block a user