Remove cloudscrapper
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2021-01-07 21:19:50 +01:00
parent a769f7fddf
commit 4d6f7b1aba
11 changed files with 45 additions and 40 deletions

3
.dockerignore Normal file
View File

@ -0,0 +1,3 @@
.idea
.venv
.db

View File

@ -11,4 +11,4 @@ REQUESTS_TIMEOUT=5
CACHE_TIMEOUT=3600
MYSQL_ROOT_PASSWORD=root
BLACKLIST_WORDS=Chris44,Vol.,[zza],.ssa,Ref:rain
CLOUDPROXY_ENDPOINT=http://cloudproxy:8191/v1
CLOUDPROXY_ENDPOINT=http://flaresolverr:8191/v1

View File

@ -44,16 +44,10 @@ You have to install MariaDB (or any MySQL server) to be able to access the admin
YggTorrent use CloudFlare to protect them to DDoS attacks.
This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...*
Over time, CloudFlare will ask you systematically to prouve yourself.
CloudFlare have three type of challenge to be completed (from the easiest to resolve to the hardest) :
- Pure Javascript done through [cloudscraper](https://github.com/VeNoMouS/cloudscraper) without any configurations
- CAPTCHA *(not supported but maybe soon™)*
- JavaScript and browser actions done through [CloudProxy](https://github.com/NoahCardoza/CloudProxy)
For CloudProxy, you have to have an instance running.
Please refer to the [documentation](https://github.com/NoahCardoza/CloudProxy#installation) or install it via [docker](https://github.com/NoahCardoza/CloudProxy#docker).
After that, change the `CLOUDPROXY_ENDPOINT` environnement variable to refer to your CloudProxy instance.
Over time, CloudFlare will ask you systematically to prove yourself.
To be able to see YggTorrent results, you have to have a FlareSolverr instance running.
Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation).
After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance.
If you use PyNyaaTa with Docker and the `docker-compose.yml` from this repository, you don't have to do all this, it comes pre-installed.

View File

@ -9,7 +9,7 @@ services:
working_dir: /app
depends_on:
- db
- cloudproxy
- flaresolverr
env_file:
- .env.dist
- .env
@ -26,8 +26,8 @@ services:
volumes:
- ./.db:/var/lib/mysql
cloudproxy:
image: jbouhd/cloudproxy
flaresolverr:
image: flaresolverr/flaresolverr
ports:
- "8191:8191"
env_file:

View File

@ -5,14 +5,19 @@ from .core import Other
from .nyaa import Nyaa
from .pantsu import Pantsu
from .yggtorrent import YggTorrent, YggAnimation
from ..config import CLOUDPROXY_ENDPOINT
async def run_all(*args, **kwargs):
return list(await gather(Nyaa(*args, **kwargs).run(),
coroutines = [Nyaa(*args, **kwargs).run(),
Pantsu(*args, **kwargs).run(),
YggTorrent(*args, **kwargs).run(),
YggAnimation(*args, **kwargs).run(),
AnimeUltime(*args, **kwargs).run()))
AnimeUltime(*args, **kwargs).run()]
if CLOUDPROXY_ENDPOINT:
coroutines.extend([YggTorrent(*args, **kwargs).run(),
YggAnimation(*args, **kwargs).run()])
return list(await gather(*coroutines))
def get_instance(url, query):

View File

@ -12,6 +12,7 @@ class AnimeUltime(ConnectorCore):
favicon = 'animeultime.png'
base_url = 'http://www.anime-ultime.net'
is_light = True
is_behind_cloudflare = False
def get_full_search_url(self):
from_date = ''

View File

@ -8,13 +8,10 @@ from logging import getLogger
from urllib.parse import urlencode
import requests
from cloudscraper import create_scraper
from cloudscraper.exceptions import CloudflareException, CaptchaException
from requests import RequestException
from ..config import CACHE_TIMEOUT, IS_DEBUG, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, CLOUDPROXY_ENDPOINT
scraper = create_scraper(interpreter='js2py', debug=IS_DEBUG)
cloudproxy_session = None
@ -80,9 +77,11 @@ ConnectorCache = Cache()
def curl_content(url, params=None, ajax=False, debug=True):
from . import get_instance
output = ''
http_code = 500
method = 'post' if (params is not None) else 'get'
instance = get_instance(url, '')
if ajax:
headers = {'X-Requested-With': 'XMLHttpRequest'}
@ -90,15 +89,15 @@ def curl_content(url, params=None, ajax=False, debug=True):
headers = {}
try:
if not instance.is_behind_cloudflare:
if method == 'post':
response = scraper.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
response = requests.post(url, params, timeout=REQUESTS_TIMEOUT, headers=headers)
else:
response = scraper.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
response = requests.get(url, timeout=REQUESTS_TIMEOUT, headers=headers)
output = response.text
http_code = response.status_code
except CloudflareException as e:
if CLOUDPROXY_ENDPOINT:
elif CLOUDPROXY_ENDPOINT:
global cloudproxy_session
if not cloudproxy_session:
json_session = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
@ -123,15 +122,12 @@ def curl_content(url, params=None, ajax=False, debug=True):
output = response['solution']['response']
if http_code == 500:
json_response = requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
requests.post(CLOUDPROXY_ENDPOINT, headers=headers, data=dumps({
'cmd': 'sessions.destroy',
'session': cloudproxy_session,
}))
cloudproxy_session = None
if debug and http_code != 200:
getLogger().exception('%s\n\n%s' % (str(e), json_response.text))
except (RequestException, CaptchaException) as e:
except RequestException as e:
if debug:
getLogger().exception(e)
@ -164,6 +160,11 @@ class ConnectorCore(ABC):
def is_light(self):
pass
@property
@abstractmethod
def is_behind_cloudflare(self):
pass
def __init__(self, query, page=1, return_type=ConnectorReturn.SEARCH):
self.query = query
self.data = []
@ -206,6 +207,7 @@ class Other(ConnectorCore):
favicon = 'blank.png'
base_url = ''
is_light = True
is_behind_cloudflare = False
def get_full_search_url(self):
pass

View File

@ -10,6 +10,7 @@ class Nyaa(ConnectorCore):
favicon = 'nyaa.png'
base_url = 'https://nyaa.si'
is_light = False
is_behind_cloudflare = False
def get_full_search_url(self):
sort_type = 'size'

View File

@ -10,6 +10,7 @@ class Pantsu(ConnectorCore):
favicon = 'pantsu.png'
base_url = 'https://nyaa.net'
is_light = False
is_behind_cloudflare = False
def get_full_search_url(self):
sort_type = 4

View File

@ -15,6 +15,7 @@ class YggTorrent(ConnectorCore):
base_url = 'https://www2.yggtorrent.si'
is_light = False
category = 2179
is_behind_cloudflare = True
def get_full_search_url(self):
sort_type = 'size'

View File

@ -3,11 +3,8 @@ Flask-SQLAlchemy==2.4.4
Flask-HTTPAuth==4.2.0
Flask-WTF==0.14.3
WTForms==2.3.3
PyMySQL==0.10.1
PyMySQL==1.0.0
requests==2.25.1
beautifulsoup4==4.9.3
python-dotenv==0.15.0
cloudscraper==1.2.50
Js2Py==0.70
polling2==0.4.6
dateparser==1.0.0