From ea5f514adb0a0ae4a7e4be71324bd44459be6a59 Mon Sep 17 00:00:00 2001 From: Michel Roux Date: Mon, 19 Dec 2022 19:02:37 +0000 Subject: [PATCH] Reuse cloudscraper --- pynyaata/connectors/core.py | 11 ++++------- requirements.txt | 1 + 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pynyaata/connectors/core.py b/pynyaata/connectors/core.py index 4645e31..f81b3ab 100644 --- a/pynyaata/connectors/core.py +++ b/pynyaata/connectors/core.py @@ -3,7 +3,7 @@ from enum import Enum from functools import wraps from json import dumps, loads -import requests +from cloudscraper import create_scraper from requests import RequestException from redis.exceptions import RedisError @@ -12,7 +12,7 @@ from ..config import CACHE_TIMEOUT, REQUESTS_TIMEOUT, logger, REDIS_ENABLED if REDIS_ENABLED: from ..config import cache -cloudproxy_session = None +scraper = create_scraper() class ConnectorReturn(Enum): @@ -80,19 +80,16 @@ def curl_content(url, params=None, ajax=False, debug=True, cloudflare=False): if ajax: headers['X-Requested-With'] = 'XMLHttpRequest' - if cloudflare: - headers['User-Agent'] = 'Googlebot/2.1 (+http://www.google.com/bot.html)' - try: if method == 'post': - response = requests.post( + response = scraper.post( url, params, timeout=REQUESTS_TIMEOUT, headers=headers ) else: - response = requests.get( + response = scraper.get( url, timeout=REQUESTS_TIMEOUT, headers=headers diff --git a/requirements.txt b/requirements.txt index 068a6af..6d04bff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ python-dotenv==0.21.0 dateparser==1.1.4 redis==4.4.0 transmission-rpc==3.4.0 +cloudscraper==1.2.66