Test to replace phantomjs by something lighter
This commit is contained in:
parent
409381b5db
commit
c9536f0216
@ -3,10 +3,11 @@ FROM debian
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
RUN apt-get update && apt-get -y upgrade && \
|
||||
apt-get -y install python3 python3-pip locales \
|
||||
RUN curl -sSL https://deb.nodesource.com/setup_12.x | bash - && \
|
||||
apt-get -y upgrade && \
|
||||
apt-get -y install python3 python3-pip locales nodejs \
|
||||
python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \
|
||||
python3-pymysql python3-requests python3-bs4 python3-dotenv && \
|
||||
apt-get -y --no-install-recommends install phantomjs && \
|
||||
pip3 install cfscrape && \
|
||||
printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \
|
||||
locale-gen && rm -rf /var/lib/apt/lists/*
|
||||
|
@ -5,12 +5,10 @@ from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from functools import wraps
|
||||
from logging import getLogger
|
||||
from subprocess import run
|
||||
from sys import platform
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from cfscrape import create_scraper
|
||||
|
||||
from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS
|
||||
from models import AnimeLink
|
||||
@ -138,37 +136,26 @@ class Connector(ABC):
|
||||
return self
|
||||
|
||||
def curl_content(self, url, params=None, ajax=False):
|
||||
if self.is_behind_cloudflare:
|
||||
try:
|
||||
qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform == 'linux' else {}
|
||||
qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env,
|
||||
shell=True, check=True, capture_output=True, timeout=7000)
|
||||
output = qt_output.stdout
|
||||
http_code = 200
|
||||
except Exception as e:
|
||||
output = ''
|
||||
http_code = 500
|
||||
if IS_DEBUG:
|
||||
getLogger().exception(e)
|
||||
scraper = create_scraper()
|
||||
|
||||
if ajax:
|
||||
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
||||
else:
|
||||
if ajax:
|
||||
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
||||
headers = {}
|
||||
|
||||
try:
|
||||
if params is not None:
|
||||
response = scraper.post(url, params, timeout=5, headers=headers)
|
||||
else:
|
||||
headers = {}
|
||||
response = scraper.get(url, timeout=5, headers=headers)
|
||||
|
||||
try:
|
||||
if params is not None:
|
||||
response = requests.post(url, params, timeout=5, headers=headers)
|
||||
else:
|
||||
response = requests.get(url, timeout=5, headers=headers)
|
||||
|
||||
output = response.text
|
||||
http_code = response.status_code
|
||||
except requests.Timeout as e:
|
||||
output = ''
|
||||
http_code = 500
|
||||
if IS_DEBUG:
|
||||
getLogger().exception(e)
|
||||
output = response.text
|
||||
http_code = response.status_code
|
||||
except Exception as e:
|
||||
output = ''
|
||||
http_code = 500
|
||||
if IS_DEBUG:
|
||||
getLogger().exception(e)
|
||||
|
||||
return {'http_code': http_code, 'output': output}
|
||||
|
||||
|
53
delay.js
53
delay.js
@ -1,53 +0,0 @@
|
||||
// https://stackoverflow.com/a/41017165
|
||||
"use strict";
|
||||
var page = require('webpage').create(),
|
||||
system = require('system'),
|
||||
mustQuit = false,
|
||||
canShow = false,
|
||||
underAttack = false,
|
||||
address, delay;
|
||||
|
||||
if (system.args.length < 3 || system.args.length > 5) {
|
||||
console.log('Usage: delay.js URL delay');
|
||||
phantom.exit(1);
|
||||
} else {
|
||||
address = system.args[1];
|
||||
delay = system.args[2];
|
||||
|
||||
page.open(address, function (status) {
|
||||
if (status !== 'success') {
|
||||
phantom.exit(1);
|
||||
} else {
|
||||
window.setTimeout(function () {
|
||||
if (underAttack && canShow) {
|
||||
console.log(page.content);
|
||||
phantom.exit();
|
||||
} else {
|
||||
phantom.exit(503);
|
||||
}
|
||||
}, delay);
|
||||
window.setTimeout(function () {
|
||||
if (mustQuit) {
|
||||
phantom.exit(429);
|
||||
} else if (!underAttack && canShow) {
|
||||
console.log(page.content);
|
||||
phantom.exit();
|
||||
}
|
||||
}, 1);
|
||||
}
|
||||
});
|
||||
|
||||
page.onResourceReceived = function (response) {
|
||||
switch (response.status) {
|
||||
case 200:
|
||||
canShow = true;
|
||||
break;
|
||||
case 429:
|
||||
mustQuit = true;
|
||||
break;
|
||||
case 503:
|
||||
underAttack = true;
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
@ -8,3 +8,4 @@ requests==2.21.0
|
||||
beautifulsoup4==4.7.1
|
||||
python-dotenv==0.9.1
|
||||
Werkzeug==0.14.1
|
||||
cfscrape
|
||||
|
Reference in New Issue
Block a user