PyNyaaTa/pynyaata/connectors/nyaa.py

from bs4 import BeautifulSoup

from .core import ConnectorCore, ConnectorReturn, ConnectorCache, curl_content
from ..utils import parse_date, link_exist_in_db, check_blacklist_words


class Nyaa(ConnectorCore):
    color = 'is-link'
    title = 'Nyaa'
    favicon = 'nyaa.png'
    base_url = 'https://nyaa.si'
    is_light = False

    def get_full_search_url(self):
        sort_type = 'size'
        if self.return_type is ConnectorReturn.HISTORY:
            sort_type = 'id'

        to_query = '(%s vf)|(%s vostfr)|(%s multi)|(%s french)' % (self.query, self.query, self.query, self.query)
        return '%s/?f=0&c=1_3&s=%s&o=desc&q=%s&p=%s' % (self.base_url, sort_type, to_query, self.page)

    def get_history(self):
        self.search()

    @ConnectorCache.cache_data
    def search(self):
        response = curl_content(self.get_full_search_url())

        if response['http_code'] == 200:
            html = BeautifulSoup(response['output'], 'html.parser')
            trs = html.select('table.torrent-list tr')
            valid_trs = 0

            for i, tr in enumerate(trs):
                if not i:
                    continue

                tds = tr.findAll('td')
                check_downloads = int(tds[7].string)
                check_seeds = int(tds[5].string)

                if check_downloads or check_seeds:
                    urls = tds[1].findAll('a')

                    if len(urls) > 1:
                        url = urls[1]
                        has_comment = True
                    else:
                        url = urls[0]
                        has_comment = False

                    url_safe = url.get_text()

                    if check_blacklist_words(url_safe):
                        continue

                    valid_trs = valid_trs + 1
                    href = self.base_url + url['href']

                    self.data.append({
                        'lang': self.get_lang(url_safe),
                        'href': href,
                        'name': url_safe,
                        'comment': str(urls[0]).replace('/view/', self.base_url + '/view/') if has_comment else '',
                        'link': tds[2].decode_contents().replace('/download/', self.base_url + '/download/'),
                        'size': tds[3].string,
                        'date': parse_date(tds[4].string, '%Y-%m-%d %H:%M'),
                        'seeds': check_seeds,
                        'leechs': tds[6].string,
                        'downloads': check_downloads,
                        'class': self.color if link_exist_in_db(href) else 'is-%s' % tr['class'][0]
                    })

            self.on_error = False
            self.is_more = valid_trs and valid_trs is not len(trs) - 1
Refactor the app 2020-04-24 19:01:44 +00:00			`from bs4 import BeautifulSoup`

			`from .core import ConnectorCore, ConnectorReturn, ConnectorCache, curl_content`
Fix no result when no blacklist_words 2020-04-27 18:47:54 +00:00			`from ..utils import parse_date, link_exist_in_db, check_blacklist_words`
Refactor the app 2020-04-24 19:01:44 +00:00

			`class Nyaa(ConnectorCore):`
			`color = 'is-link'`
			`title = 'Nyaa'`
			`favicon = 'nyaa.png'`
			`base_url = 'https://nyaa.si'`
			`is_light = False`

			`def get_full_search_url(self):`
			`sort_type = 'size'`
			`if self.return_type is ConnectorReturn.HISTORY:`
			`sort_type = 'id'`

			`to_query = '(%s vf)\|(%s vostfr)\|(%s multi)\|(%s french)' % (self.query, self.query, self.query, self.query)`
			`return '%s/?f=0&c=1_3&s=%s&o=desc&q=%s&p=%s' % (self.base_url, sort_type, to_query, self.page)`

			`def get_history(self):`
			`self.search()`

			`@ConnectorCache.cache_data`
			`def search(self):`
			`response = curl_content(self.get_full_search_url())`

			`if response['http_code'] == 200:`
			`html = BeautifulSoup(response['output'], 'html.parser')`
			`trs = html.select('table.torrent-list tr')`
			`valid_trs = 0`

			`for i, tr in enumerate(trs):`
			`if not i:`
			`continue`

			`tds = tr.findAll('td')`
			`check_downloads = int(tds[7].string)`
			`check_seeds = int(tds[5].string)`

			`if check_downloads or check_seeds:`
			`urls = tds[1].findAll('a')`

			`if len(urls) > 1:`
			`url = urls[1]`
			`has_comment = True`
			`else:`
			`url = urls[0]`
			`has_comment = False`

			`url_safe = url.get_text()`

Fix no result when no blacklist_words 2020-04-27 18:47:54 +00:00			`if check_blacklist_words(url_safe):`
Refactor the app 2020-04-24 19:01:44 +00:00			`continue`

			`valid_trs = valid_trs + 1`
Fix paging + concatenate 2020-04-24 19:49:30 +00:00			`href = self.base_url + url['href']`
Refactor the app 2020-04-24 19:01:44 +00:00
			`self.data.append({`
			`'lang': self.get_lang(url_safe),`
			`'href': href,`
			`'name': url_safe,`
Fix paging + concatenate 2020-04-24 19:49:30 +00:00			`'comment': str(urls[0]).replace('/view/', self.base_url + '/view/') if has_comment else '',`
			`'link': tds[2].decode_contents().replace('/download/', self.base_url + '/download/'),`
Refactor the app 2020-04-24 19:01:44 +00:00			`'size': tds[3].string,`
			`'date': parse_date(tds[4].string, '%Y-%m-%d %H:%M'),`
			`'seeds': check_seeds,`
			`'leechs': tds[6].string,`
			`'downloads': check_downloads,`
			`'class': self.color if link_exist_in_db(href) else 'is-%s' % tr['class'][0]`
			`})`

			`self.on_error = False`
			`self.is_more = valid_trs and valid_trs is not len(trs) - 1`