Add CloudProxy to bypass CloudFlare

2020-10-21 22:13:57 +02:00 · 2020-10-21 22:13:57 +02:00 · 2d9427407d
commit 2d9427407d
parent df6c318322
5 changed files with 57 additions and 7 deletions
--- a/.env.dist
+++ b/.env.dist
@ -11,3 +11,4 @@ REQUESTS_TIMEOUT=5
 CACHE_TIMEOUT=3600
 MYSQL_ROOT_PASSWORD=root
 BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
+CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1
--- a/README.md
+++ b/README.md
@ -3,7 +3,7 @@

 [![Build Status](https://ci.crystalyx.net/api/badges/Xefir/PyNyaaTa/status.svg)](https://ci.crystalyx.net/Xefir/PyNyaaTa)

-I'm lazy and I want to search across severall VF and VOSTFR torrents databases in one click.
+I'm lazy, and I want to search across several VF and VOSTFR torrents databases in one click.
 That's the starting point that build this app.
 At first, it was a crappy PHP project without any good future.
 After a good rewrite in Python, it's time to show it to the public, and here it is!
@ -39,6 +39,23 @@ All is managed by environment variables.
 Please look into the `.env.dist` file to list all possible environment variables.
 You have to install MariaDB (or any MySQL server) to be able to access the admin panel.

+### Bypassing CloudFlare for YggTorrent
+
+YggTorrent use CloudFlare to protect them to DDoS attacks.
+This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
+Over time, CloudFlare will ask you systematically to prouve yourself.
+
+CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
+- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
+- CAPTCHA *(not supported but maybe soon™)*
+- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
+
+For CloudProxy, you have to have an instance running.
+Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
+After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
+
+If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.
+
 ## Links

 - Project homepage: https://nyaa.crystalyx.net/
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,6 +9,7 @@ services:
    working_dir: /app
    depends_on:
      - db
+      - cloudproxy
    env_file:
      - .env.dist
      - .env
@ -24,3 +25,11 @@ services:
      - .env
    volumes:
      - ./.db:/var/lib/mysql
+
+  cloudproxy:
+    image: jbouhd/cloudproxy
+    ports:
+      - "8191:8191"
+    env_file:
+      - .env.dist
+      - .env
--- a/pynyaata/config.py
+++ b/pynyaata/config.py
@ -15,6 +15,7 @@ APP_PORT = int(environ.get('FLASK_PORT', 5000))
 CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60))
 REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5))
 BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else []
+CLOUDPROXY_ENDPOINT = environ.get('CLOUDPROXY_ENDPOINT')
 MYSQL_ENABLED = False

 app = Flask(__name__)
@ -36,7 +37,7 @@ if db_host:
        db_user, db_password, db_host, db_name
    )
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
-    app.config['SQLALCHEMY_ECHO'] = IS_DEBUG
+    app.config['SQLALCHEMY_ECHO'] = False
    app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
        'pool_recycle': 200
    }
--- a/pynyaata/connectors/core.py
+++ b/pynyaata/connectors/core.py
@ -3,13 +3,16 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 from enum import Enum
 from functools import wraps
+from json import dumps, loads
 from logging import getLogger
+from urllib.parse import urlencode

+import requests
 from cloudscraper import create_scraper
 from cloudscraper.exceptions import CloudflareException, CaptchaException
 from requests import RequestException

-from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT
+from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT

 scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)

@ -76,22 +79,41 @@ ConnectorCache = Cache()


 def curl_content(url, params=None, ajax=False, debug=True):
+    output = ''
+    http_code = 500
+    method = 'post' if (params is not None) else 'get'
+
    if ajax:
        headers = {'X-Requested-With': 'XMLHttpRequest'}
    else:
        headers = {}

    try:
-        if params is not None:
+        if method == 'post':
            response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
        else:
            response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)

        output = response.text
        http_code = response.status_code
-    except (RequestException, CloudflareException, CaptchaException) as e:
-        output = ''
-        http_code = 500
+    except CloudflareException as e:
+        if CLOUDPROXY_ENDPOINT:
+            headers['Content-Type'] = 'application/x-www-form-urlencoded' if (method == 'post') else 'application/json'
+
+            json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
+                'cmd': 'request.%s' % method,
+                'url': url,
+                'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36',
+                'postData': '%s' % urlencode(params) if (method == 'post') else ''
+            }))
+
+            response = loads(json_response.text)
+            output = response['solution']['response']
+            http_code = json_response.status_code
+
+            if debug and http_code != 200:
+                getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
+    except (RequestException, CaptchaException) as e:
        if debug:
            getLogger().exception(e)