Remove cloudscrapper

2021-01-07 21:19:50 +01:00 · 2021-01-07 21:19:50 +01:00 · 4d6f7b1aba
parent a769f7fddf
commit 4d6f7b1aba
11 changed files with 45 additions and 40 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,3 @@
 .idea
 .venv
 .db
--- a/.env.dist
+++ b/.env.dist
@ -11,4 +11,4 @@ REQUESTS_TIMEOUT=5
 CACHE_TIMEOUT=3600
 MYSQL_ROOT_PASSWORD=root
 BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
-CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1
+CLOUDPROXY_ENDPOINT=http://flaresolverr:8191/v1
--- a/README.md
+++ b/README.md
@ -44,16 +44,10 @@ You have to install MariaDB (or any MySQL server) to be able to access the admin
 YggTorrent use CloudFlare to protect them to DDoS attacks.
 This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
-Over time, CloudFlare will ask you systematically to prouve yourself.
+Over time, CloudFlare will ask you systematically to prove yourself.
-
+To be able to see YggTorrent results, you have to have a FlareSolverr instance running.
-CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
+Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation).
- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
+After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance.
 - CAPTCHA *(not supported but maybe soon™)*
 - JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
 For CloudProxy, you have to have an instance running.
 Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
 After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
 If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -9,7 +9,7 @@ services:
    working_dir: /app
    depends_on:
      - db
-      - cloudproxy
+      - flaresolverr
    env_file:
      - .env.dist
      - .env
@ -26,8 +26,8 @@ services:
    volumes:
      - ./.db:/var/lib/mysql
-  cloudproxy:
+  flaresolverr:
-    image: jbouhd/cloudproxy
+    image: flaresolverr/flaresolverr
    ports:
      - "8191:8191"
    env_file:
--- a/pynyaata/connectors/init.py
+++ b/pynyaata/connectors/init.py
@ -5,14 +5,19 @@ from .core import Other
 from .nyaa import Nyaa
 from .pantsu import Pantsu
 from .yggtorrent import YggTorrent, YggAnimation
 from ..config import CLOUDPROXY_ENDPOINT
 async def run_all(*args, **kwargs):
-    return list(await gather(Nyaa(*args, **kwargs).run(),
+    coroutines = [Nyaa(*args, **kwargs).run(),
-                             Pantsu(*args, **kwargs).run(),
+                  Pantsu(*args, **kwargs).run(),
-                             YggTorrent(*args, **kwargs).run(),
+                  AnimeUltime(*args, **kwargs).run()]
-                             YggAnimation(*args, **kwargs).run(),
+
-                             AnimeUltime(*args, **kwargs).run()))
+    if CLOUDPROXY_ENDPOINT:
        coroutines.extend([YggTorrent(*args, **kwargs).run(),
                           YggAnimation(*args, **kwargs).run()])
    return list(await gather(*coroutines))
 def get_instance(url, query):
--- a/pynyaata/connectors/animeultime.py
+++ b/pynyaata/connectors/animeultime.py
@ -12,6 +12,7 @@ class AnimeUltime(ConnectorCore):
    favicon = 'animeultime.png'
    base_url = 'http://www.anime-ultime.net'
    is_light = True
    is_behind_cloudflare = False
    def get_full_search_url(self):
        from_date = ''
--- a/pynyaata/connectors/core.py
+++ b/pynyaata/connectors/core.py
@ -8,13 +8,10 @@ from logging import getLogger
 from urllib.parse import urlencode
 import requests
 from cloudscraper import create_scraper
 from cloudscraper.exceptions import CloudflareException, CaptchaException
 from requests import RequestException
-from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
+from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
 scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
 cloudproxy_session = None
@ -80,9 +77,11 @@ ConnectorCache = Cache()
 def curl_content(url, params=None, ajax=False, debug=True):
    from . import get_instance
    output = ''
    http_code = 500
    method = 'post' if (params is not None) else 'get'
    instance = get_instance(url, '')
    if ajax:
        headers = {'X-Requested-With': 'XMLHttpRequest'}
@ -90,15 +89,15 @@ def curl_content(url, params=None, ajax=False, debug=True):
        headers = {}
    try:
-        if method == 'post':
+        if not instance.is_behind_cloudflare:
-            response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
+            if method == 'post':
-        else:
+                response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
-            response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
+            else:
                response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
-        output = response.text
+            output = response.text
-        http_code = response.status_code
+            http_code = response.status_code
-    except CloudflareException as e:
+        elif CLOUDPROXY_ENDPOINT:
        if CLOUDPROXY_ENDPOINT:
            global cloudproxy_session
            if not cloudproxy_session:
                json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
@ -123,15 +122,12 @@ def curl_content(url, params=None, ajax=False, debug=True):
                output = response['solution']['response']
            if http_code == 500:
-                json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
+                requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
                    'cmd': 'sessions.destroy',
                    'session': cloudproxy_session,
                }))
                cloudproxy_session = None
-
+    except RequestException as e:
            if debug and http_code != 200:
                getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
    except (RequestException, CaptchaException) as e:
        if debug:
            getLogger().exception(e)
@ -164,6 +160,11 @@ class ConnectorCore(ABC):
    def is_light(self):
        pass
    @property
    @abstractmethod
    def is_behind_cloudflare(self):
        pass
    def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH):
        self.query = query
        self.data = []
@ -206,6 +207,7 @@ class Other(ConnectorCore):
    favicon = 'blank.png'
    base_url = ''
    is_light = True
    is_behind_cloudflare = False
    def get_full_search_url(self):
        pass
--- a/pynyaata/connectors/nyaa.py
+++ b/pynyaata/connectors/nyaa.py
@ -10,6 +10,7 @@ class Nyaa(ConnectorCore):
    favicon = 'nyaa.png'
    base_url = 'https://nyaa.si'
    is_light = False
    is_behind_cloudflare = False
    def get_full_search_url(self):
        sort_type = 'size'
--- a/pynyaata/connectors/pantsu.py
+++ b/pynyaata/connectors/pantsu.py
@ -10,6 +10,7 @@ class Pantsu(ConnectorCore):
    favicon = 'pantsu.png'
    base_url = 'https://nyaa.net'
    is_light = False
    is_behind_cloudflare = False
    def get_full_search_url(self):
        sort_type = 4
--- a/pynyaata/connectors/yggtorrent.py
+++ b/pynyaata/connectors/yggtorrent.py
@ -15,6 +15,7 @@ class YggTorrent(ConnectorCore):
    base_url = 'https://www2.yggtorrent.si'
    is_light = False
    category = 2179
    is_behind_cloudflare = True
    def get_full_search_url(self):
        sort_type = 'size'
--- a/requirements.txt
+++ b/requirements.txt
@ -3,11 +3,8 @@ Flask-SQLAlchemy==2.4.4
 Flask-HTTPAuth==4.2.0
 Flask-WTF==0.14.3
 WTForms==2.3.3
-PyMySQL==0.10.1
+PyMySQL==1.0.0
 requests==2.25.1
 beautifulsoup4==4.9.3
 python-dotenv==0.15.0
 cloudscraper==1.2.50
 Js2Py==0.70
 polling2==0.4.6
 dateparser==1.0.0