Use Google Bot to crawl YGG
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Michel Roux 2021-12-16 23:32:51 +01:00
parent 2a39323ce9
commit 80eadb35fa
3 changed files with 10 additions and 7 deletions

View File

@ -4,7 +4,7 @@ from pynyaata.models import AnimeLink
links = AnimeLink.query.all() links = AnimeLink.query.all()
for link in links: for link in links:
html = curl_content(link.link, debug=False) html = curl_content(link.link, debug=False, cloudflare=True)
if html['http_code'] != 200 and html['http_code'] != 500: if html['http_code'] != 200 and html['http_code'] != 500:
print('(%d) %s %s : %s' % ( print('(%d) %s %s : %s' % (

View File

@ -71,16 +71,17 @@ class Cache:
ConnectorCache = Cache() ConnectorCache = Cache()
def curl_content(url, params=None, ajax=False, debug=True): def curl_content(url, params=None, ajax=False, debug=True, cloudflare=False):
output = '' output = ''
http_code = 500 http_code = 500
method = 'post' if (params is not None) else 'get' method = 'post' if (params is not None) else 'get'
headers = {}
if ajax: if ajax:
headers = {'User-Agent': 'YggRobot', headers['X-Requested-With'] = 'XMLHttpRequest'
'X-Requested-With': 'XMLHttpRequest'}
else: if cloudflare:
headers = {'User-Agent': 'YggRobot'} headers['User-Agent'] = 'Googlebot/2.1 (+http://www.google.com/bot.html)'
try: try:
if method == 'post': if method == 'post':

View File

@ -34,7 +34,9 @@ class YggTorrent(ConnectorCore):
@ConnectorCache.cache_data @ConnectorCache.cache_data
def search(self): def search(self):
if self.category: if self.category:
response = curl_content(self.get_full_search_url()) response = curl_content(
self.get_full_search_url(), cloudflare=True
)
if response['http_code'] == 200: if response['http_code'] == 200:
html = BeautifulSoup(response['output'], 'html.parser') html = BeautifulSoup(response['output'], 'html.parser')