Add CloudProxy to bypass CloudFlare
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2020-10-21 22:13:57 +02:00
parent df6c318322
commit 2d9427407d
5 changed files with 57 additions and 7 deletions

View File

@ -11,3 +11,4 @@ REQUESTS_TIMEOUT=5
CACHE_TIMEOUT=3600 CACHE_TIMEOUT=3600
MYSQL_ROOT_PASSWORD=root MYSQL_ROOT_PASSWORD=root
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1

View File

@ -3,7 +3,7 @@
[![Build Status](https://ci.crystalyx.net/api/badges/Xefir/PyNyaaTa/status.svg)](https://ci.crystalyx.net/Xefir/PyNyaaTa) [![Build Status](https://ci.crystalyx.net/api/badges/Xefir/PyNyaaTa/status.svg)](https://ci.crystalyx.net/Xefir/PyNyaaTa)
I'm lazy and I want to search across severall VF and VOSTFR torrents databases in one click. I'm lazy, and I want to search across several VF and VOSTFR torrents databases in one click.
That's the starting point that build this app. That's the starting point that build this app.
At first, it was a crappy PHP project without any good future. At first, it was a crappy PHP project without any good future.
After a good rewrite in Python, it's time to show it to the public, and here it is! After a good rewrite in Python, it's time to show it to the public, and here it is!
@ -39,6 +39,23 @@ All is managed by environment variables.
Please look into the `.env.dist` file to list all possible environment variables. Please look into the `.env.dist` file to list all possible environment variables.
You have to install MariaDB (or any MySQL server) to be able to access the admin panel. You have to install MariaDB (or any MySQL server) to be able to access the admin panel.
### Bypassing CloudFlare for YggTorrent
YggTorrent use CloudFlare to protect them to DDoS attacks.
This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
Over time, CloudFlare will ask you systematically to prouve yourself.
CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
- CAPTCHA *(not supported but maybe soon™)*
- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
For CloudProxy, you have to have an instance running.
Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.
## Links ## Links
- Project homepage: https://nyaa.crystalyx.net/ - Project homepage: https://nyaa.crystalyx.net/

View File

@ -9,6 +9,7 @@ services:
working_dir: /app working_dir: /app
depends_on: depends_on:
- db - db
- cloudproxy
env_file: env_file:
- .env.dist - .env.dist
- .env - .env
@ -24,3 +25,11 @@ services:
- .env - .env
volumes: volumes:
- ./.db:/var/lib/mysql - ./.db:/var/lib/mysql
cloudproxy:
image: jbouhd/cloudproxy
ports:
- "8191:8191"
env_file:
- .env.dist
- .env

View File

@ -15,6 +15,7 @@ APP_PORT = int(environ.get('FLASK_PORT', 5000))
CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60)) CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60))
REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5)) REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5))
BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else [] BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
CLOUDPROXY_ENDPOINT = environ.get('CLOUDPROXY_ENDPOINT')
MYSQL_ENABLED = False MYSQL_ENABLED = False
app = Flask(__name__) app = Flask(__name__)
@ -36,7 +37,7 @@ if db_host:
db_user, db_password, db_host, db_name db_user, db_password, db_host, db_name
) )
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
app.config['SQLALCHEMY_ECHO'] = IS_DEBUG app.config['SQLALCHEMY_ECHO'] = False
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = { app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
'pool_recycle': 200 'pool_recycle': 200
} }

View File

@ -3,13 +3,16 @@ from abc import ABC, abstractmethod
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from functools import wraps from functools import wraps
from json import dumps, loads
from logging import getLogger from logging import getLogger
from urllib.parse import urlencode
import requests
from cloudscraper import create_scraper from cloudscraper import create_scraper
from cloudscraper.exceptions import CloudflareException, CaptchaException from cloudscraper.exceptions import CloudflareException, CaptchaException
from requests import RequestException from requests import RequestException
from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG) scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
@ -76,22 +79,41 @@ ConnectorCache = Cache()
def curl_content(url, params=None, ajax=False, debug=True): def curl_content(url, params=None, ajax=False, debug=True):
output = ''
http_code = 500
method = 'post' if (params is not None) else 'get'
if ajax: if ajax:
headers = {'X-Requested-With': 'XMLHttpRequest'} headers = {'X-Requested-With': 'XMLHttpRequest'}
else: else:
headers = {} headers = {}
try: try:
if params is not None: if method == 'post':
response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
else: else:
response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
output = response.text output = response.text
http_code = response.status_code http_code = response.status_code
except (RequestException, CloudflareException, CaptchaException) as e: except CloudflareException as e:
output = '' if CLOUDPROXY_ENDPOINT:
http_code = 500 headers['Content-Type'] = 'application/x-www-form-urlencoded' if (method == 'post') else 'application/json'
json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'request.%s' % method,
'url': url,
'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36',
'postData': '%s' % urlencode(params) if (method == 'post') else ''
}))
response = loads(json_response.text)
output = response['solution']['response']
http_code = json_response.status_code
if debug and http_code != 200:
getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
except (RequestException, CaptchaException) as e:
if debug: if debug:
getLogger().exception(e) getLogger().exception(e)