diff --git a/.env.dist b/.env.dist index b3d839d..b0d60fb 100644 --- a/.env.dist +++ b/.env.dist @@ -11,3 +11,4 @@ REQUESTS_TIMEOUT=5 CACHE_TIMEOUT=3600 MYSQL_ROOT_PASSWORD=root BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain +CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1 diff --git a/README.md b/README.md index e851e68..41bfa1c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://ci.crystalyx.net/api/badges/Xefir/PyNyaaTa/status.svg)](https://ci.crystalyx.net/Xefir/PyNyaaTa) -I'm lazy and I want to search across severall VF and VOSTFR torrents databases in one click. +I'm lazy, and I want to search across several VF and VOSTFR torrents databases in one click. That's the starting point that build this app. At first, it was a crappy PHP project without any good future. After a good rewrite in Python, it's time to show it to the public, and here it is! @@ -39,6 +39,23 @@ All is managed by environment variables. Please look into the `.env.dist` file to list all possible environment variables. You have to install MariaDB (or any MySQL server) to be able to access the admin panel. +### Bypassing CloudFlare for YggTorrent + +YggTorrent use CloudFlare to protect them to DDoS attacks. +This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...* +Over time, CloudFlare will ask you systematically to prouve yourself. + +CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) : +- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations +- CAPTCHA *(not supported but maybe soon™)* +- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy) + +For CloudProxy, you have to have an instance running. +Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker). +After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance. + +If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed. + ## Links - Project homepage: https://nyaa.crystalyx.net/ diff --git a/docker-compose.yml b/docker-compose.yml index 55dedfd..e82c6e1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,7 @@ services: working_dir: /app depends_on: - db + - cloudproxy env_file: - .env.dist - .env @@ -24,3 +25,11 @@ services: - .env volumes: - ./.db:/var/lib/mysql + + cloudproxy: + image: jbouhd/cloudproxy + ports: + - "8191:8191" + env_file: + - .env.dist + - .env diff --git a/pynyaata/config.py b/pynyaata/config.py index 3bd66a3..4f174e4 100644 --- a/pynyaata/config.py +++ b/pynyaata/config.py @@ -15,6 +15,7 @@ APP_PORT = int(environ.get('FLASK_PORT', 5000)) CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60)) REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5)) BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else [] +CLOUDPROXY_ENDPOINT = environ.get('CLOUDPROXY_ENDPOINT') MYSQL_ENABLED = False app = Flask(__name__) @@ -36,7 +37,7 @@ if db_host: db_user, db_password, db_host, db_name ) app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True - app.config['SQLALCHEMY_ECHO'] = IS_DEBUG + app.config['SQLALCHEMY_ECHO'] = False app.config['SQLALCHEMY_ENGINE_OPTIONS'] = { 'pool_recycle': 200 } diff --git a/pynyaata/connectors/core.py b/pynyaata/connectors/core.py index b29c1c1..1cbe247 100644 --- a/pynyaata/connectors/core.py +++ b/pynyaata/connectors/core.py @@ -3,13 +3,16 @@ from abc import ABC, abstractmethod from datetime import datetime from enum import Enum from functools import wraps +from json import dumps, loads from logging import getLogger +from urllib.parse import urlencode +import requests from cloudscraper import create_scraper from cloudscraper.exceptions import CloudflareException, CaptchaException from requests import RequestException -from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT +from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG) @@ -76,22 +79,41 @@ ConnectorCache = Cache() def curl_content(url, params=None, ajax=False, debug=True): + output = '' + http_code = 500 + method = 'post' if (params is not None) else 'get' + if ajax: headers = {'X-Requested-With': 'XMLHttpRequest'} else: headers = {} try: - if params is not None: + if method == 'post': response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers) else: response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers) output = response.text http_code = response.status_code - except (RequestException, CloudflareException, CaptchaException) as e: - output = '' - http_code = 500 + except CloudflareException as e: + if CLOUDPROXY_ENDPOINT: + headers['Content-Type'] = 'application/x-www-form-urlencoded' if (method == 'post') else 'application/json' + + json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({ + 'cmd': 'request.%s' % method, + 'url': url, + 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36', + 'postData': '%s' % urlencode(params) if (method == 'post') else '' + })) + + response = loads(json_response.text) + output = response['solution']['response'] + http_code = json_response.status_code + + if debug and http_code != 200: + getLogger().exception('%s\n\n%s' % (str(e), json_response.text)) + except (RequestException, CaptchaException) as e: if debug: getLogger().exception(e)