diff --git a/Dockerfile b/Dockerfile index ef53f53..3e03297 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,10 +3,11 @@ FROM debian ENV DEBIAN_FRONTEND noninteractive ENV LANG C.UTF-8 -RUN apt-get update && apt-get -y upgrade && \ - apt-get -y install python3 python3-pip locales \ +RUN curl -sSL https://deb.nodesource.com/setup_12.x | bash - && \ + apt-get -y upgrade && \ + apt-get -y install python3 python3-pip locales nodejs \ python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \ python3-pymysql python3-requests python3-bs4 python3-dotenv && \ - apt-get -y --no-install-recommends install phantomjs && \ + pip3 install cfscrape && \ printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \ locale-gen && rm -rf /var/lib/apt/lists/* diff --git a/connectors.py b/connectors.py index 4152de1..6c8b73b 100644 --- a/connectors.py +++ b/connectors.py @@ -5,12 +5,10 @@ from datetime import datetime, timedelta from enum import Enum from functools import wraps from logging import getLogger -from subprocess import run -from sys import platform from urllib.parse import quote -import requests from bs4 import BeautifulSoup +from cfscrape import create_scraper from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS from models import AnimeLink @@ -138,37 +136,26 @@ class Connector(ABC): return self def curl_content(self, url, params=None, ajax=False): - if self.is_behind_cloudflare: - try: - qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform == 'linux' else {} - qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env, - shell=True, check=True, capture_output=True, timeout=7000) - output = qt_output.stdout - http_code = 200 - except Exception as e: - output = '' - http_code = 500 - if IS_DEBUG: - getLogger().exception(e) + scraper = create_scraper() + + if ajax: + headers = {'X-Requested-With': 'XMLHttpRequest'} else: - if ajax: - headers = {'X-Requested-With': 'XMLHttpRequest'} + headers = {} + + try: + if params is not None: + response = scraper.post(url, params, timeout=5, headers=headers) else: - headers = {} + response = scraper.get(url, timeout=5, headers=headers) - try: - if params is not None: - response = requests.post(url, params, timeout=5, headers=headers) - else: - response = requests.get(url, timeout=5, headers=headers) - - output = response.text - http_code = response.status_code - except requests.Timeout as e: - output = '' - http_code = 500 - if IS_DEBUG: - getLogger().exception(e) + output = response.text + http_code = response.status_code + except Exception as e: + output = '' + http_code = 500 + if IS_DEBUG: + getLogger().exception(e) return {'http_code': http_code, 'output': output} diff --git a/delay.js b/delay.js deleted file mode 100644 index 261466d..0000000 --- a/delay.js +++ /dev/null @@ -1,53 +0,0 @@ -// https://stackoverflow.com/a/41017165 -"use strict"; -var page = require('webpage').create(), - system = require('system'), - mustQuit = false, - canShow = false, - underAttack = false, - address, delay; - -if (system.args.length < 3 || system.args.length > 5) { - console.log('Usage: delay.js URL delay'); - phantom.exit(1); -} else { - address = system.args[1]; - delay = system.args[2]; - - page.open(address, function (status) { - if (status !== 'success') { - phantom.exit(1); - } else { - window.setTimeout(function () { - if (underAttack && canShow) { - console.log(page.content); - phantom.exit(); - } else { - phantom.exit(503); - } - }, delay); - window.setTimeout(function () { - if (mustQuit) { - phantom.exit(429); - } else if (!underAttack && canShow) { - console.log(page.content); - phantom.exit(); - } - }, 1); - } - }); - - page.onResourceReceived = function (response) { - switch (response.status) { - case 200: - canShow = true; - break; - case 429: - mustQuit = true; - break; - case 503: - underAttack = true; - break; - } - }; -} diff --git a/requirements.txt b/requirements.txt index aee6488..658ec20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ requests==2.21.0 beautifulsoup4==4.7.1 python-dotenv==0.9.1 Werkzeug==0.14.1 +cfscrape