Use Google Bot to crawl YGG
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Michel Roux 2021-12-16 23:32:51 +01:00
parent 2a39323ce9
commit 80eadb35fa
3 changed files with 10 additions and 7 deletions

View File

@ -4,7 +4,7 @@ from pynyaata.models import AnimeLink
links = AnimeLink.query.all()
for link in links:
html = curl_content(link.link, debug=False)
html = curl_content(link.link, debug=False, cloudflare=True)
if html['http_code'] != 200 and html['http_code'] != 500:
print('(%d) %s %s : %s' % (

View File

@ -71,16 +71,17 @@ class Cache:
ConnectorCache = Cache()
def curl_content(url, params=None, ajax=False, debug=True):
def curl_content(url, params=None, ajax=False, debug=True, cloudflare=False):
output = ''
http_code = 500
method = 'post' if (params is not None) else 'get'
headers = {}
if ajax:
headers = {'User-Agent': 'YggRobot',
'X-Requested-With': 'XMLHttpRequest'}
else:
headers = {'User-Agent': 'YggRobot'}
headers['X-Requested-With'] = 'XMLHttpRequest'
if cloudflare:
headers['User-Agent'] = 'Googlebot/2.1 (+http://www.google.com/bot.html)'
try:
if method == 'post':

View File

@ -34,7 +34,9 @@ class YggTorrent(ConnectorCore):
@ConnectorCache.cache_data
def search(self):
if self.category:
response = curl_content(self.get_full_search_url())
response = curl_content(
self.get_full_search_url(), cloudflare=True
)
if response['http_code'] == 200:
html = BeautifulSoup(response['output'], 'html.parser')