Test to replace phantomjs by something lighter
This commit is contained in:
parent
409381b5db
commit
c9536f0216
@ -3,10 +3,11 @@ FROM debian
|
|||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
ENV LANG C.UTF-8
|
ENV LANG C.UTF-8
|
||||||
|
|
||||||
RUN apt-get update && apt-get -y upgrade && \
|
RUN curl -sSL https://deb.nodesource.com/setup_12.x | bash - && \
|
||||||
apt-get -y install python3 python3-pip locales \
|
apt-get -y upgrade && \
|
||||||
|
apt-get -y install python3 python3-pip locales nodejs \
|
||||||
python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \
|
python3-flask python3-flask-sqlalchemy python3-flask-httpauth python3-flaskext.wtf \
|
||||||
python3-pymysql python3-requests python3-bs4 python3-dotenv && \
|
python3-pymysql python3-requests python3-bs4 python3-dotenv && \
|
||||||
apt-get -y --no-install-recommends install phantomjs && \
|
pip3 install cfscrape && \
|
||||||
printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \
|
printf "en_US.UTF-8 UTF-8\nfr_FR.UTF-8 UTF-8\n" > /etc/locale.gen && \
|
||||||
locale-gen && rm -rf /var/lib/apt/lists/*
|
locale-gen && rm -rf /var/lib/apt/lists/*
|
||||||
|
@ -5,12 +5,10 @@ from datetime import datetime, timedelta
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from subprocess import run
|
|
||||||
from sys import platform
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from cfscrape import create_scraper
|
||||||
|
|
||||||
from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS
|
from config import IS_DEBUG, CACHE_TIMEOUT, BLACKLIST_WORDS
|
||||||
from models import AnimeLink
|
from models import AnimeLink
|
||||||
@ -138,19 +136,8 @@ class Connector(ABC):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def curl_content(self, url, params=None, ajax=False):
|
def curl_content(self, url, params=None, ajax=False):
|
||||||
if self.is_behind_cloudflare:
|
scraper = create_scraper()
|
||||||
try:
|
|
||||||
qt_env = {'QT_QPA_PLATFORM': 'offscreen'} if platform == 'linux' else {}
|
|
||||||
qt_output = run('phantomjs --cookies-file=/tmp/cookies.json delay.js "%s" 5000' % url, env=qt_env,
|
|
||||||
shell=True, check=True, capture_output=True, timeout=7000)
|
|
||||||
output = qt_output.stdout
|
|
||||||
http_code = 200
|
|
||||||
except Exception as e:
|
|
||||||
output = ''
|
|
||||||
http_code = 500
|
|
||||||
if IS_DEBUG:
|
|
||||||
getLogger().exception(e)
|
|
||||||
else:
|
|
||||||
if ajax:
|
if ajax:
|
||||||
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
||||||
else:
|
else:
|
||||||
@ -158,13 +145,13 @@ class Connector(ABC):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if params is not None:
|
if params is not None:
|
||||||
response = requests.post(url, params, timeout=5, headers=headers)
|
response = scraper.post(url, params, timeout=5, headers=headers)
|
||||||
else:
|
else:
|
||||||
response = requests.get(url, timeout=5, headers=headers)
|
response = scraper.get(url, timeout=5, headers=headers)
|
||||||
|
|
||||||
output = response.text
|
output = response.text
|
||||||
http_code = response.status_code
|
http_code = response.status_code
|
||||||
except requests.Timeout as e:
|
except Exception as e:
|
||||||
output = ''
|
output = ''
|
||||||
http_code = 500
|
http_code = 500
|
||||||
if IS_DEBUG:
|
if IS_DEBUG:
|
||||||
|
53
delay.js
53
delay.js
@ -1,53 +0,0 @@
|
|||||||
// https://stackoverflow.com/a/41017165
|
|
||||||
"use strict";
|
|
||||||
var page = require('webpage').create(),
|
|
||||||
system = require('system'),
|
|
||||||
mustQuit = false,
|
|
||||||
canShow = false,
|
|
||||||
underAttack = false,
|
|
||||||
address, delay;
|
|
||||||
|
|
||||||
if (system.args.length < 3 || system.args.length > 5) {
|
|
||||||
console.log('Usage: delay.js URL delay');
|
|
||||||
phantom.exit(1);
|
|
||||||
} else {
|
|
||||||
address = system.args[1];
|
|
||||||
delay = system.args[2];
|
|
||||||
|
|
||||||
page.open(address, function (status) {
|
|
||||||
if (status !== 'success') {
|
|
||||||
phantom.exit(1);
|
|
||||||
} else {
|
|
||||||
window.setTimeout(function () {
|
|
||||||
if (underAttack && canShow) {
|
|
||||||
console.log(page.content);
|
|
||||||
phantom.exit();
|
|
||||||
} else {
|
|
||||||
phantom.exit(503);
|
|
||||||
}
|
|
||||||
}, delay);
|
|
||||||
window.setTimeout(function () {
|
|
||||||
if (mustQuit) {
|
|
||||||
phantom.exit(429);
|
|
||||||
} else if (!underAttack && canShow) {
|
|
||||||
console.log(page.content);
|
|
||||||
phantom.exit();
|
|
||||||
}
|
|
||||||
}, 1);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
page.onResourceReceived = function (response) {
|
|
||||||
switch (response.status) {
|
|
||||||
case 200:
|
|
||||||
canShow = true;
|
|
||||||
break;
|
|
||||||
case 429:
|
|
||||||
mustQuit = true;
|
|
||||||
break;
|
|
||||||
case 503:
|
|
||||||
underAttack = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
@ -8,3 +8,4 @@ requests==2.21.0
|
|||||||
beautifulsoup4==4.7.1
|
beautifulsoup4==4.7.1
|
||||||
python-dotenv==0.9.1
|
python-dotenv==0.9.1
|
||||||
Werkzeug==0.14.1
|
Werkzeug==0.14.1
|
||||||
|
cfscrape
|
||||||
|
Reference in New Issue
Block a user