Add CloudProxy to bypass CloudFlare

2020-10-21 22:13:57 +02:00 · 2020-10-21 22:13:57 +02:00 · 2d9427407d
commit 2d9427407d
parent df6c318322
5 changed files with 57 additions and 7 deletions
--- a/.env.dist
+++ b/.env.dist
@ -11,3 +11,4 @@ REQUESTS_TIMEOUT=5
 CACHE_TIMEOUT=3600
 MYSQL_ROOT_PASSWORD=root
 BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
 CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1
--- a/README.md
+++ b/README.md
@ -3,7 +3,7 @@
 [![Build Status](https://ci.crystalyx.net/api/badges/Xefir/PyNyaaTa/status.svg)](https://ci.crystalyx.net/Xefir/PyNyaaTa)
-I'm lazy and I want to search across severall VF and VOSTFR torrents databases in one click.
+I'm lazy, and I want to search across several VF and VOSTFR torrents databases in one click.
 That's the starting point that build this app.
 At first, it was a crappy PHP project without any good future.
 After a good rewrite in Python, it's time to show it to the public, and here it is!
@ -39,6 +39,23 @@ All is managed by environment variables.
 Please look into the `.env.dist` file to list all possible environment variables.
 You have to install MariaDB (or any MySQL server) to be able to access the admin panel.
 ### Bypassing CloudFlare for YggTorrent
 YggTorrent use CloudFlare to protect them to DDoS attacks.
 This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
 Over time, CloudFlare will ask you systematically to prouve yourself.
 CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
 - Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
 - CAPTCHA *(not supported but maybe soon™)*
 - JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
 For CloudProxy, you have to have an instance running.
 Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
 After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
 If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.
 ## Links
 - Project homepage: https://nyaa.crystalyx.net/
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,6 +9,7 @@ services:
    working_dir: /app
    depends_on:
      - db
      - cloudproxy
    env_file:
      - .env.dist
      - .env
@ -24,3 +25,11 @@ services:
      - .env
    volumes:
      - ./.db:/var/lib/mysql
  cloudproxy:
    image: jbouhd/cloudproxy
    ports:
      - "8191:8191"
    env_file:
      - .env.dist
      - .env
--- a/pynyaata/config.py
+++ b/pynyaata/config.py
@ -15,6 +15,7 @@ APP_PORT = int(environ.get('FLASK_PORT', 5000))
 CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60))
 REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5))
 BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
 CLOUDPROXY_ENDPOINT = environ.get('CLOUDPROXY_ENDPOINT')
 MYSQL_ENABLED = False
 app = Flask(__name__)
@ -36,7 +37,7 @@ if db_host:
        db_user, db_password, db_host, db_name
    )
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
-    app.config['SQLALCHEMY_ECHO'] = IS_DEBUG
+    app.config['SQLALCHEMY_ECHO'] = False
    app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
        'pool_recycle': 200
    }
--- a/pynyaata/connectors/core.py
+++ b/pynyaata/connectors/core.py
@ -3,13 +3,16 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 from enum import Enum
 from functools import wraps
 from json import dumps, loads
 from logging import getLogger
 from urllib.parse import urlencode
 import requests
 from cloudscraper import create_scraper
 from cloudscraper.exceptions import CloudflareException, CaptchaException
 from requests import RequestException
-from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT
+from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
 scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
@ -76,22 +79,41 @@ ConnectorCache = Cache()
 def curl_content(url, params=None, ajax=False, debug=True):
    output = ''
    http_code = 500
    method = 'post' if (params is not None) else 'get'
    if ajax:
        headers = {'X-Requested-With': 'XMLHttpRequest'}
    else:
        headers = {}
    try:
-        if params is not None:
+        if method == 'post':
            response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
        else:
            response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
        output = response.text
        http_code = response.status_code
-    except (RequestException, CloudflareException, CaptchaException) as e:
+    except CloudflareException as e:
-        output = ''
+        if CLOUDPROXY_ENDPOINT:
-        http_code = 500
+            headers['Content-Type'] = 'application/x-www-form-urlencoded' if (method == 'post') else 'application/json'
            json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
                'cmd': 'request.%s' % method,
                'url': url,
                'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36',
                'postData': '%s' % urlencode(params) if (method == 'post') else ''
            }))
            response = loads(json_response.text)
            output = response['solution']['response']
            http_code = json_response.status_code
            if debug and http_code != 200:
                getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
    except (RequestException, CaptchaException) as e:
        if debug:
            getLogger().exception(e)