RP_Search/test/test_results.py

from bs4 import BeautifulSoup
from app.filter import Filter
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.utils import results
from app.utils.session import generate_key
from datetime import datetime
from dateutil.parser import ParserError, parse
from urllib.parse import urlparse

from test.conftest import demo_config


def get_search_results(data):
    secret_key = generate_key()
    soup = Filter(user_key=secret_key, config=Config(**demo_config)).clean(
        BeautifulSoup(data, 'html.parser'))

    main_divs = soup.find('div', {'id': 'main'})
    assert len(main_divs) > 1

    result_divs = []
    for div in main_divs:
        # Result divs should only have 1 inner div
        if (len(list(div.children)) != 1
                or not div.findChild()
                or 'div' not in div.findChild().name):
            continue

        result_divs.append(div)

    return result_divs


def test_get_results(client):
    rv = client.get(f'/{Endpoint.search}?q=test')
    assert rv._status_code == 200

    # Depending on the search, there can be more
    # than 10 result divs
    results = get_search_results(rv.data)
    assert len(results) >= 10
    assert len(results) <= 15


def test_post_results(client):
    rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))
    assert rv._status_code == 302


def test_translate_search(client):
    rv = client.get(f'/{Endpoint.search}?q=translate hola')
    assert rv._status_code == 200

    # Pretty weak test, but better than nothing
    str_data = str(rv.data)
    assert 'iframe' in str_data
    assert '/auto/en/ hola' in str_data


def test_block_results(client):
    rv = client.get(f'/{Endpoint.search}?q=pinterest')
    assert rv._status_code == 200

    has_pinterest = False
    for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
        if 'pinterest.com' in urlparse(link['href']).netloc:
            has_pinterest = True
            break

    assert has_pinterest

    demo_config['block'] = 'pinterest.com'
    rv = client.post(f'/{Endpoint.config}', data=demo_config)
    assert rv._status_code == 302

    rv = client.get(f'/{Endpoint.search}?q=pinterest')
    assert rv._status_code == 200

    for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
        result_site = urlparse(link['href']).netloc
        if not result_site:
            continue
        assert result_site not in 'pinterest.com'


def test_view_my_ip(client):
    rv = client.get(f'/{Endpoint.search}?q=my ip address')
    assert rv._status_code == 200

    # Pretty weak test, but better than nothing
    str_data = str(rv.data)
    assert 'Your public IP address' in str_data
    assert '127.0.0.1' in str_data


def test_recent_results(client):
    times = {
        'tbs=qdr:y': 365,
        'tbs=qdr:m': 31,
        'tbs=qdr:w': 7
    }

    for time, num_days in times.items():
        rv = client.get(f'/{Endpoint.search}?q=test&' + time)
        result_divs = get_search_results(rv.data)

        current_date = datetime.now()
        for div in [_ for _ in result_divs if _.find('span')]:
            date_span = div.find('span').decode_contents()
            if not date_span or len(date_span) > 15 or len(date_span) < 7:
                continue

            try:
                date = parse(date_span)
                # Date can have a little bit of wiggle room
                assert (current_date - date).days <= (num_days + 5)
            except ParserError:
                pass


def test_leading_slash_search(client):
    # Ensure searches with a leading slash are interpreted
    # correctly as queries and not endpoints
    q = '/test'
    rv = client.get(f'/{Endpoint.search}?q={q}')
    assert rv._status_code == 200

    soup = Filter(
        user_key=generate_key(),
        config=Config(**demo_config),
        query=q
    ).clean(BeautifulSoup(rv.data, 'html.parser'))

    for link in soup.find_all('a', href=True):
        if 'start=' not in link['href']:
            continue

        assert link['href'].startswith(f'{Endpoint.search}')


def test_site_alt_prefix_skip():
    # Ensure prefixes are skipped correctly for site alts

    # default silte_alts (farside.link)
    assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'
    assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'
    assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'

    test_site_alts = {
    'reddit.com': 'reddit.endswithmobile.domain',
    'twitter.com': 'https://twitter.endswithm.domain',
    'youtube.com': 'http://yt.endswithwww.domain',
    }
    # Domains with part of SKIP_PREFIX in them
    assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'
    assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'
    assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'
upload 2024-11-06 10:01:28 +00:00			`from bs4 import BeautifulSoup`
			`from app.filter import Filter`
			`from app.models.config import Config`
			`from app.models.endpoint import Endpoint`
			`from app.utils import results`
			`from app.utils.session import generate_key`
			`from datetime import datetime`
			`from dateutil.parser import ParserError, parse`
			`from urllib.parse import urlparse`

			`from test.conftest import demo_config`


			`def get_search_results(data):`
			`secret_key = generate_key()`
			`soup = Filter(user_key=secret_key, config=Config(**demo_config)).clean(`
			`BeautifulSoup(data, 'html.parser'))`

			`main_divs = soup.find('div', {'id': 'main'})`
			`assert len(main_divs) > 1`

			`result_divs = []`
			`for div in main_divs:`
			`# Result divs should only have 1 inner div`
			`if (len(list(div.children)) != 1`
			`or not div.findChild()`
			`or 'div' not in div.findChild().name):`
			`continue`

			`result_divs.append(div)`

			`return result_divs`


			`def test_get_results(client):`
			`rv = client.get(f'/{Endpoint.search}?q=test')`
			`assert rv._status_code == 200`

			`# Depending on the search, there can be more`
			`# than 10 result divs`
			`results = get_search_results(rv.data)`
			`assert len(results) >= 10`
			`assert len(results) <= 15`


			`def test_post_results(client):`
			`rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))`
			`assert rv._status_code == 302`


			`def test_translate_search(client):`
			`rv = client.get(f'/{Endpoint.search}?q=translate hola')`
			`assert rv._status_code == 200`

			`# Pretty weak test, but better than nothing`
			`str_data = str(rv.data)`
			`assert 'iframe' in str_data`
			`assert '/auto/en/ hola' in str_data`


			`def test_block_results(client):`
			`rv = client.get(f'/{Endpoint.search}?q=pinterest')`
			`assert rv._status_code == 200`

			`has_pinterest = False`
			`for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):`
			`if 'pinterest.com' in urlparse(link['href']).netloc:`
			`has_pinterest = True`
			`break`

			`assert has_pinterest`

			`demo_config['block'] = 'pinterest.com'`
			`rv = client.post(f'/{Endpoint.config}', data=demo_config)`
			`assert rv._status_code == 302`

			`rv = client.get(f'/{Endpoint.search}?q=pinterest')`
			`assert rv._status_code == 200`

			`for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):`
			`result_site = urlparse(link['href']).netloc`
			`if not result_site:`
			`continue`
			`assert result_site not in 'pinterest.com'`


			`def test_view_my_ip(client):`
			`rv = client.get(f'/{Endpoint.search}?q=my ip address')`
			`assert rv._status_code == 200`

			`# Pretty weak test, but better than nothing`
			`str_data = str(rv.data)`
			`assert 'Your public IP address' in str_data`
			`assert '127.0.0.1' in str_data`


			`def test_recent_results(client):`
			`times = {`
			`'tbs=qdr:y': 365,`
			`'tbs=qdr:m': 31,`
			`'tbs=qdr:w': 7`
			`}`

			`for time, num_days in times.items():`
			`rv = client.get(f'/{Endpoint.search}?q=test&' + time)`
			`result_divs = get_search_results(rv.data)`

			`current_date = datetime.now()`
			`for div in [_ for _ in result_divs if _.find('span')]:`
			`date_span = div.find('span').decode_contents()`
			`if not date_span or len(date_span) > 15 or len(date_span) < 7:`
			`continue`

			`try:`
			`date = parse(date_span)`
			`# Date can have a little bit of wiggle room`
			`assert (current_date - date).days <= (num_days + 5)`
			`except ParserError:`
			`pass`


			`def test_leading_slash_search(client):`
			`# Ensure searches with a leading slash are interpreted`
			`# correctly as queries and not endpoints`
			`q = '/test'`
			`rv = client.get(f'/{Endpoint.search}?q={q}')`
			`assert rv._status_code == 200`

			`soup = Filter(`
			`user_key=generate_key(),`
			`config=Config(**demo_config),`
			`query=q`
			`).clean(BeautifulSoup(rv.data, 'html.parser'))`

			`for link in soup.find_all('a', href=True):`
			`if 'start=' not in link['href']:`
			`continue`

			`assert link['href'].startswith(f'{Endpoint.search}')`


			`def test_site_alt_prefix_skip():`
			`# Ensure prefixes are skipped correctly for site alts`

			`# default silte_alts (farside.link)`
			`assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'`
			`assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'`
			`assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'`

			`test_site_alts = {`
			`'reddit.com': 'reddit.endswithmobile.domain',`
			`'twitter.com': 'https://twitter.endswithm.domain',`
			`'youtube.com': 'http://yt.endswithwww.domain',`
			`}`
			`# Domains with part of SKIP_PREFIX in them`
			`assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'`
			`assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'`
			`assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'`