Test to replace phantomjs by something lighter

This commit is contained in:
Michel Roux 2020-04-06 15:09:27 +02:00
parent 409381b5db
commit c9536f0216
4 changed files with 23 additions and 87 deletions

View File

@ -3,10 +3,11 @@ FROM debian
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
ENV LANG C.UTF-8 ENV LANG C.UTF-8
RUN apt-get update && apt-get -y upgrade && \ RUN curl -sSL https://deb.nodesource.com/setup_12.x | bash - && \
apt-get -y install python3 python3-pip locales \ apt-get -y upgrade && \
apt-get -y install python3 python3-pip locales nodejs \
python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \ python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \
python3-pymysql python3-requests python3-bs4 python3-dotenv && \ python3-pymysql python3-requests python3-bs4 python3-dotenv && \
apt-get -y --no-install-recommends install phantomjs && \ pip3 install cfscrape && \
printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \ printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \
locale-gen && rm -rf /var/lib/apt/lists/* locale-gen && rm -rf /var/lib/apt/lists/*

View File

@ -5,12 +5,10 @@ from datetime import datetime, timedelta
from enum import Enum from enum import Enum
from functools import wraps from functools import wraps
from logging import getLogger from logging import getLogger
from subprocess import run
from sys import platform
from urllib.parse import quote from urllib.parse import quote
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cfscrape import create_scraper
from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS
from models import AnimeLink from models import AnimeLink
@ -138,37 +136,26 @@ class Connector(ABC):
return self return self
def curl_content(self, url, params=None, ajax=False): def curl_content(self, url, params=None, ajax=False):
if self.is_behind_cloudflare: scraper = create_scraper()
try:
qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform == 'linux' else {} if ajax:
qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env, headers = {'X-Requested-With': 'XMLHttpRequest'}
shell=True, check=True, capture_output=True, timeout=7000)
output = qt_output.stdout
http_code = 200
except Exception as e:
output = ''
http_code = 500
if IS_DEBUG:
getLogger().exception(e)
else: else:
if ajax: headers = {}
headers = {'X-Requested-With': 'XMLHttpRequest'}
try:
if params is not None:
response = scraper.post(url, params, timeout=5, headers=headers)
else: else:
headers = {} response = scraper.get(url, timeout=5, headers=headers)
try: output = response.text
if params is not None: http_code = response.status_code
response = requests.post(url, params, timeout=5, headers=headers) except Exception as e:
else: output = ''
response = requests.get(url, timeout=5, headers=headers) http_code = 500
if IS_DEBUG:
output = response.text getLogger().exception(e)
http_code = response.status_code
except requests.Timeout as e:
output = ''
http_code = 500
if IS_DEBUG:
getLogger().exception(e)
return {'http_code': http_code, 'output': output} return {'http_code': http_code, 'output': output}

View File

@ -1,53 +0,0 @@
// https://stackoverflow.com/a/41017165
"use strict";
var page = require('webpage').create(),
system = require('system'),
mustQuit = false,
canShow = false,
underAttack = false,
address, delay;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: delay.js URL delay');
phantom.exit(1);
} else {
address = system.args[1];
delay = system.args[2];
page.open(address, function (status) {
if (status !== 'success') {
phantom.exit(1);
} else {
window.setTimeout(function () {
if (underAttack && canShow) {
console.log(page.content);
phantom.exit();
} else {
phantom.exit(503);
}
}, delay);
window.setTimeout(function () {
if (mustQuit) {
phantom.exit(429);
} else if (!underAttack && canShow) {
console.log(page.content);
phantom.exit();
}
}, 1);
}
});
page.onResourceReceived = function (response) {
switch (response.status) {
case 200:
canShow = true;
break;
case 429:
mustQuit = true;
break;
case 503:
underAttack = true;
break;
}
};
}

View File

@ -8,3 +8,4 @@ requests==2.21.0
beautifulsoup4==4.7.1 beautifulsoup4==4.7.1
python-dotenv==0.9.1 python-dotenv==0.9.1
Werkzeug==0.14.1 Werkzeug==0.14.1
cfscrape