diff --git a/README.md b/README.md index b99dfca..21f720e 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,15 @@ All is managed by environment variables. Please look into the `.env.dist` file to list all possible environment variables. You have to have a running database server to be able to access the admin panel. +### Bypassing CloudFlare for YggTorrent + +YggTorrent use CloudFlare to protect them to DDoS attacks. +This app will make abusive requests to their servers, and CloudFlare will try to detect if PyNyaaTa is a real human or not. *I think you have the answer to the question ...* +Over time, CloudFlare will ask you systematically to prove yourself. +To be able to see YggTorrent results, you have to have a FlareSolverr instance running. +Please refer to their [documentation](https://github.com/FlareSolverr/FlareSolverr#installation). +After that, change the `CLOUDPROXY_ENDPOINT` environment variable to refer to your CloudProxy instance. + ## Links - Project homepage: https://nyaa.crystalyx.net/ diff --git a/get404.py b/get404.py index 7eda786..f183b9d 100644 --- a/get404.py +++ b/get404.py @@ -4,7 +4,7 @@ from pynyaata.models import AnimeLink links = AnimeLink.query.all() for link in links: - html = curl_content(link.link, debug=False) + html = curl_content(link.link, debug=False, cloudflare=True) if html['http_code'] != 200 and html['http_code'] != 500: print('(%d) %s %s : %s' % ( diff --git a/pynyaata/config.py b/pynyaata/config.py index f1363be..6d87431 100644 --- a/pynyaata/config.py +++ b/pynyaata/config.py @@ -17,7 +17,7 @@ APP_PORT = int(environ.get('FLASK_PORT', 5000)) CACHE_TIMEOUT = int(environ.get('CACHE_TIMEOUT', 60 * 60)) REQUESTS_TIMEOUT = int(environ.get('REQUESTS_TIMEOUT', 5)) BLACKLIST_WORDS = environ.get('BLACKLIST_WORDS', '').split(',') if environ.get('BLACKLIST_WORDS', '') else [] -YGG_IP = environ.get('YGG_IP') +CLOUDPROXY_ENDPOINT = environ.get('CLOUDPROXY_ENDPOINT') DB_ENABLED = False REDIS_ENABLED = False TRANSMISSION_ENABLED = False diff --git a/pynyaata/connectors/core.py b/pynyaata/connectors/core.py index 45d5d63..4bb2625 100644 --- a/pynyaata/connectors/core.py +++ b/pynyaata/connectors/core.py @@ -4,13 +4,16 @@ from functools import wraps from json import dumps, loads from redis.exceptions import RedisError -from requests import RequestException, get, post +from requests import RequestException, Session from ..config import CACHE_TIMEOUT, REDIS_ENABLED, REQUESTS_TIMEOUT, logger +from ..flarerequests import FlareRequests if REDIS_ENABLED: from ..config import cache +cloudproxy_session = None + class ConnectorReturn(Enum): SEARCH = 1 @@ -68,28 +71,26 @@ class Cache: ConnectorCache = Cache() -def curl_content(url, params=None, ajax=False, debug=True, custom_host=None): +def curl_content(url, params=None, ajax=False, debug=True, cloudflare=False): output = '' http_code = 500 method = 'post' if (params is not None) else 'get' + request = FlareRequests() if cloudflare else Session() headers = {} if ajax: headers['X-Requested-With'] = 'XMLHttpRequest' - if custom_host: - headers['Host'] = custom_host - try: if method == 'post': - response = post( + response = request.post( url, params, timeout=REQUESTS_TIMEOUT, headers=headers ) else: - response = get( + response = request.get( url, timeout=REQUESTS_TIMEOUT, headers=headers diff --git a/pynyaata/connectors/yggtorrent.py b/pynyaata/connectors/yggtorrent.py index 0aa3742..0430db5 100644 --- a/pynyaata/connectors/yggtorrent.py +++ b/pynyaata/connectors/yggtorrent.py @@ -5,17 +5,14 @@ from urllib.parse import quote from bs4 import BeautifulSoup from .core import ConnectorCache, ConnectorCore, ConnectorReturn, curl_content -from ..config import YGG_IP from ..utils import check_blacklist_words, check_if_vf, link_exist_in_db, parse_date -YGG_DOMAIN = 'www6.yggtorrent.lol' - class YggTorrent(ConnectorCore): color = 'is-success' title = 'YggTorrent' favicon = 'yggtorrent.png' - base_url = 'https://%s' % YGG_DOMAIN if not YGG_IP else 'http://%s' % YGG_IP + base_url = 'https://www6.yggtorrent.lol' is_light = False category = 2179 @@ -38,7 +35,7 @@ class YggTorrent(ConnectorCore): def search(self): if self.category: response = curl_content( - self.get_full_search_url(), custom_host=YGG_DOMAIN if YGG_IP else None + self.get_full_search_url(), cloudflare=True ) if response['http_code'] == 200: diff --git a/pynyaata/flarerequests.py b/pynyaata/flarerequests.py new file mode 100644 index 0000000..9ee5e6a --- /dev/null +++ b/pynyaata/flarerequests.py @@ -0,0 +1,51 @@ +from io import BytesIO +from urllib import parse +from requests import Response, Session, post +from .config import CLOUDPROXY_ENDPOINT + + +class FlareRequests(Session): + def request(self, method, url, params=None, data=None, **kwargs): + if not CLOUDPROXY_ENDPOINT: + return super().request(method, url, params, data, **kwargs) + + if params: + url += "&" if len(url.split("?")) > 1 else "?" + url = f"{url}{parse.urlencode(params)}" + + post_data = { + "cmd": f"request.{method.lower()}", + "url": url, + } + + if data: + post_data["postData"] = parse.urlencode(data) + + response = post( + CLOUDPROXY_ENDPOINT, + json=post_data, + ) + + solution = response.json() + + if "solution" in solution: + encoding = None + headers = solution["solution"]["headers"] + if "content-type" in headers: + content_type = headers["content-type"].split(";") + if len(content_type) > 1: + charset = content_type[1].split("=") + if len(charset) > 1: + encoding = charset[1] + + resolved = Response() + + resolved.status_code = solution["solution"]["status"] + resolved.headers = headers + resolved.raw = BytesIO(solution["solution"]["response"].encode()) + resolved.url = url + resolved.encoding = encoding + resolved.reason = solution["status"] + resolved.cookies = solution["solution"]["cookies"] + + return resolved