Remove cloudscrapper

2021-01-07 21:19:50 +01:00 · 2021-01-07 21:19:50 +01:00 · 4d6f7b1aba
commit 4d6f7b1aba
parent a769f7fddf
11 changed files with 45 additions and 40 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,3 @@
+.idea
+.venv
+.db
--- a/.env.dist
+++ b/.env.dist
@ -11,4 +11,4 @@ REQUESTS_TIMEOUT=5
 CACHE_TIMEOUT=3600
 MYSQL_ROOT_PASSWORD=root
 BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
-CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1
+CLOUDPROXY_ENDPOINT=http://flaresolverr:8191/v1
--- a/README.md
+++ b/README.md
@ -44,16 +44,10 @@ You have to install MariaDB (or any MySQL server) to be able to access the admin

 YggTorrent use CloudFlare to protect them to DDoS attacks.
 This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
-Over time, CloudFlare will ask you systematically to prouve yourself.
-
-CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
- CAPTCHA *(not supported but maybe soon™)*
- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
-
-For CloudProxy, you have to have an instance running.
-Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
-After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
+Over time, CloudFlare will ask you systematically to prove yourself.
+To be able to see YggTorrent results, you have to have a FlareSolverr instance running.
+Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation).
+After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance.

 If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.

--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,7 +9,7 @@ services:
    working_dir: /app
    depends_on:
      - db
-      - cloudproxy
+      - flaresolverr
    env_file:
      - .env.dist
      - .env
@ -26,8 +26,8 @@ services:
    volumes:
      - ./.db:/var/lib/mysql

-  cloudproxy:
-    image: jbouhd/cloudproxy
+  flaresolverr:
+    image: flaresolverr/flaresolverr
    ports:
      - "8191:8191"
    env_file:
--- a/pynyaata/connectors/init.py
+++ b/pynyaata/connectors/init.py
@ -5,14 +5,19 @@ from .core import Other
 from .nyaa import Nyaa
 from .pantsu import Pantsu
 from .yggtorrent import YggTorrent, YggAnimation
+from ..config import CLOUDPROXY_ENDPOINT


 async def run_all(*args, **kwargs):
-    return list(await gather(Nyaa(*args, **kwargs).run(),
+    coroutines = [Nyaa(*args, **kwargs).run(),
                  Pantsu(*args, **kwargs).run(),
-                             YggTorrent(*args, **kwargs).run(),
-                             YggAnimation(*args, **kwargs).run(),
-                             AnimeUltime(*args, **kwargs).run()))
+                  AnimeUltime(*args, **kwargs).run()]
+
+    if CLOUDPROXY_ENDPOINT:
+        coroutines.extend([YggTorrent(*args, **kwargs).run(),
+                           YggAnimation(*args, **kwargs).run()])
+
+    return list(await gather(*coroutines))


 def get_instance(url, query):
--- a/pynyaata/connectors/animeultime.py
+++ b/pynyaata/connectors/animeultime.py
@ -12,6 +12,7 @@ class AnimeUltime(ConnectorCore):
    favicon = 'animeultime.png'
    base_url = 'http://www.anime-ultime.net'
    is_light = True
+    is_behind_cloudflare = False

    def get_full_search_url(self):
        from_date = ''
--- a/pynyaata/connectors/core.py
+++ b/pynyaata/connectors/core.py
@ -8,13 +8,10 @@ from logging import getLogger
 from urllib.parse import urlencode

 import requests
-from cloudscraper import create_scraper
-from cloudscraper.exceptions import CloudflareException, CaptchaException
 from requests import RequestException

-from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
+from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT

-scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
 cloudproxy_session = None


@ -80,9 +77,11 @@ ConnectorCache = Cache()


 def curl_content(url, params=None, ajax=False, debug=True):
+    from . import get_instance
    output = ''
    http_code = 500
    method = 'post' if (params is not None) else 'get'
+    instance = get_instance(url, '')

    if ajax:
        headers = {'X-Requested-With': 'XMLHttpRequest'}
@ -90,15 +89,15 @@ def curl_content(url, params=None, ajax=False, debug=True):
        headers = {}

    try:
+        if not instance.is_behind_cloudflare:
            if method == 'post':
-            response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
+                response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
            else:
-            response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
+                response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)

            output = response.text
            http_code = response.status_code
-    except CloudflareException as e:
-        if CLOUDPROXY_ENDPOINT:
+        elif CLOUDPROXY_ENDPOINT:
            global cloudproxy_session
            if not cloudproxy_session:
                json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
@ -123,15 +122,12 @@ def curl_content(url, params=None, ajax=False, debug=True):
                output = response['solution']['response']

            if http_code == 500:
-                json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
+                requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
                    'cmd': 'sessions.destroy',
                    'session': cloudproxy_session,
                }))
                cloudproxy_session = None
-
-            if debug and http_code != 200:
-                getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
-    except (RequestException, CaptchaException) as e:
+    except RequestException as e:
        if debug:
            getLogger().exception(e)

@ -164,6 +160,11 @@ class ConnectorCore(ABC):
    def is_light(self):
        pass

+    @property
+    @abstractmethod
+    def is_behind_cloudflare(self):
+        pass
+
    def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH):
        self.query = query
        self.data = []
@ -206,6 +207,7 @@ class Other(ConnectorCore):
    favicon = 'blank.png'
    base_url = ''
    is_light = True
+    is_behind_cloudflare = False

    def get_full_search_url(self):
        pass
--- a/pynyaata/connectors/nyaa.py
+++ b/pynyaata/connectors/nyaa.py
@ -10,6 +10,7 @@ class Nyaa(ConnectorCore):
    favicon = 'nyaa.png'
    base_url = 'https://nyaa.si'
    is_light = False
+    is_behind_cloudflare = False

    def get_full_search_url(self):
        sort_type = 'size'
--- a/pynyaata/connectors/pantsu.py
+++ b/pynyaata/connectors/pantsu.py
@ -10,6 +10,7 @@ class Pantsu(ConnectorCore):
    favicon = 'pantsu.png'
    base_url = 'https://nyaa.net'
    is_light = False
+    is_behind_cloudflare = False

    def get_full_search_url(self):
        sort_type = 4
--- a/pynyaata/connectors/yggtorrent.py
+++ b/pynyaata/connectors/yggtorrent.py
@ -15,6 +15,7 @@ class YggTorrent(ConnectorCore):
    base_url = 'https://www2.yggtorrent.si'
    is_light = False
    category = 2179
+    is_behind_cloudflare = True

    def get_full_search_url(self):
        sort_type = 'size'
--- a/requirements.txt
+++ b/requirements.txt
@ -3,11 +3,8 @@ Flask-SQLAlchemy==2.4.4
 Flask-HTTPAuth==4.2.0
 Flask-WTF==0.14.3
 WTForms==2.3.3
-PyMySQL==0.10.1
+PyMySQL==1.0.0
 requests==2.25.1
 beautifulsoup4==4.9.3
 python-dotenv==0.15.0
-cloudscraper==1.2.50
-Js2Py==0.70
-polling2==0.4.6
 dateparser==1.0.0