commit 0ea1783e8629f5307bd25856e0d17dd82c1bb27e Author: gabeszm Date: Fri Dec 6 18:21:39 2024 +0100 beta2 diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..b4dbe62 Binary files /dev/null and b/.DS_Store differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dcaf27c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,103 @@ +FROM python:3.12.6-alpine3.20 AS builder + +RUN apk --update add \ + build-base \ + libxml2-dev \ + libxslt-dev \ + openssl-dev \ + libffi-dev + +COPY requirements.txt . + +RUN pip install --upgrade pip +RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r requirements.txt + +FROM python:3.12.6-alpine3.20 + +RUN apk add --update --no-cache tor curl openrc libstdc++ +# git go //for obfs4proxy +# libcurl4-openssl-dev + +RUN apk -U upgrade + +# uncomment to build obfs4proxy +# RUN git clone https://gitlab.com/yawning/obfs4.git +# WORKDIR /obfs4 +# RUN go build -o obfs4proxy/obfs4proxy ./obfs4proxy +# RUN cp ./obfs4proxy/obfs4proxy /usr/bin/obfs4proxy + +ARG DOCKER_USER=whoogle +ARG DOCKER_USERID=927 +ARG config_dir=/config +RUN mkdir -p $config_dir +RUN chmod a+w $config_dir +VOLUME $config_dir + +ARG url_prefix='' +ARG username='' +ARG password='' +ARG proxyuser='' +ARG proxypass='' +ARG proxytype='' +ARG proxyloc='' +ARG whoogle_dotenv='' +ARG use_https='' +ARG whoogle_port=5000 +ARG twitter_alt='farside.link/nitter' +ARG youtube_alt='farside.link/invidious' +ARG reddit_alt='farside.link/libreddit' +ARG medium_alt='farside.link/scribe' +ARG translate_alt='farside.link/lingva' +ARG imgur_alt='farside.link/rimgo' +ARG wikipedia_alt='farside.link/wikiless' +ARG imdb_alt='farside.link/libremdb' +ARG quora_alt='farside.link/quetre' +ARG so_alt='farside.link/anonymousoverflow' + +ENV CONFIG_VOLUME=$config_dir \ + WHOOGLE_URL_PREFIX=$url_prefix \ + WHOOGLE_USER=$username \ + WHOOGLE_PASS=$password \ + WHOOGLE_PROXY_USER=$proxyuser \ + WHOOGLE_PROXY_PASS=$proxypass \ + WHOOGLE_PROXY_TYPE=$proxytype \ + WHOOGLE_PROXY_LOC=$proxyloc \ + WHOOGLE_DOTENV=$whoogle_dotenv \ + HTTPS_ONLY=$use_https \ + EXPOSE_PORT=$whoogle_port \ + WHOOGLE_ALT_TW=$twitter_alt \ + WHOOGLE_ALT_YT=$youtube_alt \ + WHOOGLE_ALT_RD=$reddit_alt \ + WHOOGLE_ALT_MD=$medium_alt \ + WHOOGLE_ALT_TL=$translate_alt \ + WHOOGLE_ALT_IMG=$imgur_alt \ + WHOOGLE_ALT_WIKI=$wikipedia_alt \ + WHOOGLE_ALT_IMDB=$imdb_alt \ + WHOOGLE_ALT_QUORA=$quora_alt \ + WHOOGLE_ALT_SO=$so_alt + +WORKDIR /whoogle + +COPY --from=builder /install /usr/local +COPY misc/tor/torrc /etc/tor/torrc +COPY misc/tor/start-tor.sh misc/tor/start-tor.sh +COPY app/ app/ +COPY run whoogle.env* ./ + +# Create user/group to run as +RUN adduser -D -g $DOCKER_USERID -u $DOCKER_USERID $DOCKER_USER + +# Fix ownership / permissions +RUN chown -R ${DOCKER_USER}:${DOCKER_USER} /whoogle /var/lib/tor + +# Allow writing symlinks to build dir +RUN chown $DOCKER_USERID:$DOCKER_USERID app/static/build + +USER $DOCKER_USER:$DOCKER_USER + +EXPOSE $EXPOSE_PORT + +HEALTHCHECK --interval=30s --timeout=5s \ + CMD curl -f http://localhost:${EXPOSE_PORT}/healthz || exit 1 + +CMD misc/tor/start-tor.sh & ./run diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c8b71df --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Ben Busby + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c853358 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +graft app/static +graft app/templates +graft app/misc +include requirements.txt +recursive-include test +global-exclude *.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..d25e4a2 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +Hi ![](https://user-images.githubusercontent.com/18350557/176309783-0785949b-9127-417c-8b55-ab5a4333674e.gif)My name is GabeszM +=============================================================================================================================== + +Whoogle verzió: [![Latest Release](https://img.shields.io/github/v/release/benbusby/whoogle-search)](https://github.com/benbusby/shoogle/releases) + +Licensz: [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + + + + ### Changelog +=============================================================================================================================== + +2024.11.06 + - Frissítve a legújabb 0.9.1-es verzióra + + + ### Skills ami nincs + +

+GitPHPJavaScriptVS CodeSublime TextHTML5CSS3FigmaDockerMacOS +

diff --git a/app 2.zip b/app 2.zip new file mode 100644 index 0000000..ef6e345 Binary files /dev/null and b/app 2.zip differ diff --git a/app.json b/app.json new file mode 100644 index 0000000..6a9b7e8 --- /dev/null +++ b/app.json @@ -0,0 +1,194 @@ +{ + "name": "Whoogle Search", + "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content", + "repository": "https://github.com/benbusby/whoogle-search", + "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png", + "keywords": [ + "search", + "metasearch", + "flask", + "docker", + "heroku", + "adblock", + "degoogle", + "privacy" + ], + "stack": "container", + "env": { + "WHOOGLE_URL_PREFIX": { + "description": "The URL prefix to use for the whoogle instance (i.e. \"/whoogle\")", + "value": "", + "required": false + }, + "WHOOGLE_USER": { + "description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PASS": { + "description": "The password for basic auth. WHOOGLE_USER must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_USER": { + "description": "The username of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_PASS": { + "description": "The password of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_TYPE": { + "description": "The type of the proxy server. For example \"socks5\". Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_LOC": { + "description": "The location of the proxy server (host or ip). Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_ALT_TW": { + "description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.", + "value": "farside.link/nitter", + "required": false + }, + "WHOOGLE_ALT_YT": { + "description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.", + "value": "farside.link/invidious", + "required": false + }, + "WHOOGLE_ALT_RD": { + "description": "The site to use as a replacement for reddit.com when site alternatives are enabled in the config.", + "value": "farside.link/libreddit", + "required": false + }, + "WHOOGLE_ALT_MD": { + "description": "The site to use as a replacement for medium.com when site alternatives are enabled in the config.", + "value": "farside.link/scribe", + "required": false + }, + "WHOOGLE_ALT_TL": { + "description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.", + "value": "farside.link/lingva", + "required": false + }, + "WHOOGLE_ALT_IMG": { + "description": "The site to use as a replacement for imgur.com when site alternatives are enabled in the config.", + "value": "farside.link/rimgo", + "required": false + }, + "WHOOGLE_ALT_WIKI": { + "description": "The site to use as a replacement for wikipedia.com when site alternatives are enabled in the config.", + "value": "farside.link/wikiless", + "required": false + }, + "WHOOGLE_ALT_IMDB": { + "description": "The site to use as a replacement for imdb.com when site alternatives are enabled in the config.", + "value": "farside.link/libremdb", + "required": false + }, + "WHOOGLE_ALT_QUORA": { + "description": "The site to use as a replacement for quora.com when site alternatives are enabled in the config.", + "value": "farside.link/quetre", + "required": false + }, + "WHOOGLE_ALT_SO": { + "description": "The site to use as a replacement for stackoverflow.com when site alternatives are enabled in the config.", + "value": "farside.link/anonymousoverflow", + "required": false + }, + "WHOOGLE_MINIMAL": { + "description": "Remove everything except basic result cards from all search queries (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_COUNTRY": { + "description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_TIME_PERIOD" : { + "description": "[CONFIG] The time period to use for restricting search results", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_LANGUAGE": { + "description": "[CONFIG] The language to use for the interface (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_SEARCH_LANGUAGE": { + "description": "[CONFIG] The language to use for search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_DISABLE": { + "description": "[CONFIG] Disable ability for client to change config (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_BLOCK": { + "description": "[CONFIG] Block websites from search results (comma-separated list)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_THEME": { + "description": "[CONFIG] Set theme to 'dark', 'light', or 'system'", + "value": "system", + "required": false + }, + "WHOOGLE_CONFIG_SAFE": { + "description": "[CONFIG] Use safe mode for searches (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_ALTS": { + "description": "[CONFIG] Use social media alternatives (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_NEAR": { + "description": "[CONFIG] Restrict results to only those near a particular city", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_TOR": { + "description": "[CONFIG] Use Tor, if available (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_NEW_TAB": { + "description": "[CONFIG] Always open results in new tab (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_VIEW_IMAGE": { + "description": "[CONFIG] Enable View Image option (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_GET_ONLY": { + "description": "[CONFIG] Search using GET requests only (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_STYLE": { + "description": "[CONFIG] Custom CSS styling (paste in CSS or leave blank)", + "value": ":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }", + "required": false + }, + "WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED": { + "description": "[CONFIG] Encrypt preferences token, requires WHOOGLE_CONFIG_PREFERENCES_KEY to be set", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_PREFERENCES_KEY": { + "description": "[CONFIG] Key to encrypt preferences", + "value": "NEEDS_TO_BE_MODIFIED", + "required": false + } + } +} diff --git a/app.zip b/app.zip new file mode 100644 index 0000000..d964422 Binary files /dev/null and b/app.zip differ diff --git a/app/.DS_Store b/app/.DS_Store new file mode 100644 index 0000000..d6828ae Binary files /dev/null and b/app/.DS_Store differ diff --git a/app/__init__.py b/app/__init__.py new file mode 100755 index 0000000..5a10faf --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,199 @@ +from app.filter import clean_query +from app.request import send_tor_signal +from app.utils.session import generate_key +from app.utils.bangs import gen_bangs_json, load_all_bangs +from app.utils.misc import gen_file_hash, read_config_bool +from base64 import b64encode +from bs4 import MarkupResemblesLocatorWarning +from datetime import datetime, timedelta +from dotenv import load_dotenv +from flask import Flask +import json +import logging.config +import os +from stem import Signal +import threading +import warnings + +from werkzeug.middleware.proxy_fix import ProxyFix + +from app.utils.misc import read_config_bool +from app.version import __version__ + +app = Flask(__name__, static_folder=os.path.dirname( + os.path.abspath(__file__)) + '/static') + +app.wsgi_app = ProxyFix(app.wsgi_app) + +# look for WHOOGLE_ENV, else look in parent directory +dot_env_path = os.getenv( + "WHOOGLE_DOTENV_PATH", + os.path.join(os.path.dirname(os.path.abspath(__file__)), "../whoogle.env")) + +# Load .env file if enabled +if os.path.exists(dot_env_path): + load_dotenv(dot_env_path) + +app.enc_key = generate_key() + +if read_config_bool('HTTPS_ONLY'): + app.config['SESSION_COOKIE_NAME'] = '__Secure-session' + app.config['SESSION_COOKIE_SECURE'] = True + +app.config['VERSION_NUMBER'] = __version__ +app.config['APP_ROOT'] = os.getenv( + 'APP_ROOT', + os.path.dirname(os.path.abspath(__file__))) +app.config['STATIC_FOLDER'] = os.getenv( + 'STATIC_FOLDER', + os.path.join(app.config['APP_ROOT'], 'static')) +app.config['BUILD_FOLDER'] = os.path.join( + app.config['STATIC_FOLDER'], 'build') +app.config['CACHE_BUSTING_MAP'] = {} +app.config['LANGUAGES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/languages.json'), + encoding='utf-8')) +app.config['COUNTRIES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/countries.json'), + encoding='utf-8')) +app.config['TIME_PERIODS'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/time_periods.json'), + encoding='utf-8')) +app.config['TRANSLATIONS'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/translations.json'), + encoding='utf-8')) +app.config['THEMES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/themes.json'), + encoding='utf-8')) +app.config['HEADER_TABS'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/header_tabs.json'), + encoding='utf-8')) +app.config['CONFIG_PATH'] = os.getenv( + 'CONFIG_VOLUME', + os.path.join(app.config['STATIC_FOLDER'], 'config')) +app.config['DEFAULT_CONFIG'] = os.path.join( + app.config['CONFIG_PATH'], + 'config.json') +app.config['CONFIG_DISABLE'] = read_config_bool('WHOOGLE_CONFIG_DISABLE') +app.config['SESSION_FILE_DIR'] = os.path.join( + app.config['CONFIG_PATH'], + 'session') +app.config['MAX_SESSION_SIZE'] = 4000 # Sessions won't exceed 4KB +app.config['BANG_PATH'] = os.getenv( + 'CONFIG_VOLUME', + os.path.join(app.config['STATIC_FOLDER'], 'bangs')) +app.config['BANG_FILE'] = os.path.join( + app.config['BANG_PATH'], + 'bangs.json') + +# Ensure all necessary directories exist +if not os.path.exists(app.config['CONFIG_PATH']): + os.makedirs(app.config['CONFIG_PATH']) + +if not os.path.exists(app.config['SESSION_FILE_DIR']): + os.makedirs(app.config['SESSION_FILE_DIR']) + +if not os.path.exists(app.config['BANG_PATH']): + os.makedirs(app.config['BANG_PATH']) + +if not os.path.exists(app.config['BUILD_FOLDER']): + os.makedirs(app.config['BUILD_FOLDER']) + +# Session values +app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key') +if os.path.exists(app_key_path): + try: + app.config['SECRET_KEY'] = open(app_key_path, 'r').read() + except PermissionError: + app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) +else: + app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) + with open(app_key_path, 'w') as key_file: + key_file.write(app.config['SECRET_KEY']) + key_file.close() +app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365) + +# NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's +# previous session to persist when accessing the instance from an external +# link. Setting this value to 'strict' causes Whoogle to revalidate a new +# session, and fail, resulting in cookies being disabled. +app.config['SESSION_COOKIE_SAMESITE'] = 'Strict' + +# Config fields that are used to check for updates +app.config['RELEASES_URL'] = 'https://github.com/' \ + 'benbusby/whoogle-search/releases' +app.config['LAST_UPDATE_CHECK'] = datetime.now() - timedelta(hours=24) +app.config['HAS_UPDATE'] = '' + +# The alternative to Google Translate is treated a bit differently than other +# social media site alternatives, in that it is used for any translation +# related searches. +translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://farside.link/lingva') +if not translate_url.startswith('http'): + translate_url = 'https://' + translate_url +app.config['TRANSLATE_URL'] = translate_url + +app.config['CSP'] = 'default-src \'none\';' \ + 'frame-src ' + translate_url + ';' \ + 'manifest-src \'self\';' \ + 'img-src \'self\' data:;' \ + 'style-src \'self\' \'unsafe-inline\';' \ + 'script-src \'self\';' \ + 'media-src \'self\';' \ + 'connect-src \'self\';' + +# Generate DDG bang filter +generating_bangs = False +if not os.path.exists(app.config['BANG_FILE']): + generating_bangs = True + json.dump({}, open(app.config['BANG_FILE'], 'w')) + bangs_thread = threading.Thread( + target=gen_bangs_json, + args=(app.config['BANG_FILE'],)) + bangs_thread.start() + +# Build new mapping of static files for cache busting +cache_busting_dirs = ['css', 'js'] +for cb_dir in cache_busting_dirs: + full_cb_dir = os.path.join(app.config['STATIC_FOLDER'], cb_dir) + for cb_file in os.listdir(full_cb_dir): + # Create hash from current file state + full_cb_path = os.path.join(full_cb_dir, cb_file) + cb_file_link = gen_file_hash(full_cb_dir, cb_file) + build_path = os.path.join(app.config['BUILD_FOLDER'], cb_file_link) + + try: + os.symlink(full_cb_path, build_path) + except FileExistsError: + # Symlink hasn't changed, ignore + pass + + # Create mapping for relative path urls + map_path = build_path.replace(app.config['APP_ROOT'], '') + if map_path.startswith('/'): + map_path = map_path[1:] + app.config['CACHE_BUSTING_MAP'][cb_file] = map_path + +# Templating functions +app.jinja_env.globals.update(clean_query=clean_query) +app.jinja_env.globals.update( + cb_url=lambda f: app.config['CACHE_BUSTING_MAP'][f.lower()]) + +# Attempt to acquire tor identity, to determine if Tor config is available +send_tor_signal(Signal.HEARTBEAT) + +# Suppress spurious warnings from BeautifulSoup +warnings.simplefilter('ignore', MarkupResemblesLocatorWarning) + +from app import routes # noqa + +# The gen_bangs_json function takes care of loading bangs, so skip it here if +# it's already being loaded +if not generating_bangs: + load_all_bangs(app.config['BANG_FILE']) + +# Disable logging from imported modules +logging.config.dictConfig({ + 'version': 1, + 'disable_existing_loggers': True, +}) diff --git a/app/__main__.py b/app/__main__.py new file mode 100755 index 0000000..03a424c --- /dev/null +++ b/app/__main__.py @@ -0,0 +1,3 @@ +from .routes import run_app + +run_app() diff --git a/app/__pycache__/.DS_Store b/app/__pycache__/.DS_Store new file mode 100755 index 0000000..5008ddf Binary files /dev/null and b/app/__pycache__/.DS_Store differ diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc new file mode 100755 index 0000000..572e4b9 Binary files /dev/null and b/app/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..5817012 Binary files /dev/null and b/app/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/__pycache__/__main__.cpython-311.pyc b/app/__pycache__/__main__.cpython-311.pyc new file mode 100755 index 0000000..a715cd3 Binary files /dev/null and b/app/__pycache__/__main__.cpython-311.pyc differ diff --git a/app/__pycache__/__main__.cpython-312.pyc b/app/__pycache__/__main__.cpython-312.pyc new file mode 100644 index 0000000..797a0ee Binary files /dev/null and b/app/__pycache__/__main__.cpython-312.pyc differ diff --git a/app/__pycache__/filter.cpython-311.pyc b/app/__pycache__/filter.cpython-311.pyc new file mode 100755 index 0000000..7977c7c Binary files /dev/null and b/app/__pycache__/filter.cpython-311.pyc differ diff --git a/app/__pycache__/filter.cpython-312.pyc b/app/__pycache__/filter.cpython-312.pyc new file mode 100644 index 0000000..830da41 Binary files /dev/null and b/app/__pycache__/filter.cpython-312.pyc differ diff --git a/app/__pycache__/request.cpython-311.pyc b/app/__pycache__/request.cpython-311.pyc new file mode 100755 index 0000000..a1cceab Binary files /dev/null and b/app/__pycache__/request.cpython-311.pyc differ diff --git a/app/__pycache__/request.cpython-312.pyc b/app/__pycache__/request.cpython-312.pyc new file mode 100644 index 0000000..aeed5c8 Binary files /dev/null and b/app/__pycache__/request.cpython-312.pyc differ diff --git a/app/__pycache__/routes.cpython-311.pyc b/app/__pycache__/routes.cpython-311.pyc new file mode 100755 index 0000000..d0c022c Binary files /dev/null and b/app/__pycache__/routes.cpython-311.pyc differ diff --git a/app/__pycache__/routes.cpython-312.pyc b/app/__pycache__/routes.cpython-312.pyc new file mode 100644 index 0000000..85dd60e Binary files /dev/null and b/app/__pycache__/routes.cpython-312.pyc differ diff --git a/app/__pycache__/version.cpython-311.pyc b/app/__pycache__/version.cpython-311.pyc new file mode 100755 index 0000000..caca993 Binary files /dev/null and b/app/__pycache__/version.cpython-311.pyc differ diff --git a/app/__pycache__/version.cpython-312.pyc b/app/__pycache__/version.cpython-312.pyc new file mode 100644 index 0000000..ca6b37c Binary files /dev/null and b/app/__pycache__/version.cpython-312.pyc differ diff --git a/app/filter.py b/app/filter.py new file mode 100755 index 0000000..6ba112f --- /dev/null +++ b/app/filter.py @@ -0,0 +1,790 @@ +import cssutils +from bs4 import BeautifulSoup +from bs4.element import ResultSet, Tag +from cryptography.fernet import Fernet +from flask import render_template +import html +import urllib.parse as urlparse +from urllib.parse import parse_qs +import re + +from app.models.g_classes import GClasses +from app.request import VALID_PARAMS, MAPS_URL +from app.utils.misc import get_abs_url, read_config_bool +from app.utils.results import ( + BLANK_B64, GOOG_IMG, GOOG_STATIC, G_M_LOGO_URL, LOGO_URL, SITE_ALTS, + has_ad_content, filter_link_args, append_anon_view, get_site_alt, +) +from app.models.endpoint import Endpoint +from app.models.config import Config + + +MAPS_ARGS = ['q', 'daddr'] + +minimal_mode_sections = ['Top stories', 'Images', 'People also ask'] +unsupported_g_pages = [ + 'google.com/aclk' + '*.googleapis.com' + '*.gstatic.com' + '*.google-analytics.com' + 'adservice.google.com' + 'support.google.com', + 'accounts.google.com', + 'policies.google.com', + 'google.com/preferences', + 'google.com/intl', + 'advanced_search', + 'tbm=shop', + 'ageverification.google.co.kr' +] + +unsupported_g_divs = [ + 'google.com/preferences?hl=', + 'ageverification.google.co.kr' + 'google.com/aclk?sa=' +] + + +def extract_q(q_str: str, href: str) -> str: + """Extracts the 'q' element from a result link. This is typically + either the link to a result's website, or a string. + + Args: + q_str: The result link to parse + href: The full url to check for standalone 'q' elements first, + rather than parsing the whole query string and then checking. + + Returns: + str: The 'q' element of the link, or an empty string + """ + return parse_qs(q_str, keep_blank_values=True)['q'][0] if ('&q=' in href or '?q=' in href) else '' + + +def build_map_url(href: str) -> str: + """Tries to extract known args that explain the location in the url. If a + location is found, returns the default url with it. Otherwise, returns the + url unchanged. + + Args: + href: The full url to check. + + Returns: + str: The parsed url, or the url unchanged. + """ + # parse the url + parsed_url = parse_qs(href) + # iterate through the known parameters and try build the url + for param in MAPS_ARGS: + if param in parsed_url: + return MAPS_URL + "?q=" + parsed_url[param][0] + + # query could not be extracted returning unchanged url + return href + + +def clean_query(query: str) -> str: + """Strips the blocked site list from the query, if one is being + used. + + Args: + query: The query string + + Returns: + str: The query string without any "-site:..." filters + """ + return query[:query.find('-site:')] if '-site:' in query else query + + +def clean_css(css: str, page_url: str) -> str: + """Removes all remote URLs from a CSS string. + + Args: + css: The CSS string + + Returns: + str: The filtered CSS, with URLs proxied through Whoogle + """ + sheet = cssutils.parseString(css) + urls = cssutils.getUrls(sheet) + + for url in urls: + abs_url = get_abs_url(url, page_url) + if abs_url.startswith('data:'): + continue + css = css.replace( + url, + f'{Endpoint.element}?type=image/png&url={abs_url}' + ) + + return css + + +class Filter: + # Limit used for determining if a result is a "regular" result or a list + # type result (such as "people also asked", "related searches", etc) + RESULT_CHILD_LIMIT = 7 + + def __init__( + self, + user_key: str, + config: Config, + root_url='', + page_url='', + query='', + mobile=False) -> None: + self.soup = None + self.config = config + self.mobile = mobile + self.user_key = user_key + self.page_url = page_url + self.query = query + self.main_divs = ResultSet('') + self._elements = 0 + self._av = set() + + self.root_url = root_url[:-1] if root_url.endswith('/') else root_url + + def __getitem__(self, name): + return getattr(self, name) + + @property + def elements(self): + return self._elements + + def encrypt_path(self, path, is_element=False) -> str: + # Encrypts path to avoid plaintext results in logs + if is_element: + # Element paths are encrypted separately from text, to allow key + # regeneration once all items have been served to the user + enc_path = Fernet(self.user_key).encrypt(path.encode()).decode() + self._elements += 1 + return enc_path + + return Fernet(self.user_key).encrypt(path.encode()).decode() + + def clean(self, soup) -> BeautifulSoup: + self.soup = soup + self.main_divs = self.soup.find('div', {'id': 'main'}) + self.remove_ads() + self.remove_block_titles() + self.remove_block_url() + self.collapse_sections() + self.update_css() + self.update_styling() + self.remove_block_tabs() + + # self.main_divs is only populated for the main page of search results + # (i.e. not images/news/etc). + if self.main_divs: + for div in self.main_divs: + self.sanitize_div(div) + + for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]: + self.update_element_src(img, 'image/png') + + for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]: + self.update_element_src(audio, 'audio/mpeg') + audio['controls'] = '' + + for link in self.soup.find_all('a', href=True): + self.update_link(link) + self.add_favicon(link) + + if self.config.alts: + self.site_alt_swap() + + input_form = self.soup.find('form') + if input_form is not None: + input_form['method'] = 'GET' if self.config.get_only else 'POST' + # Use a relative URI for submissions + input_form['action'] = 'search' + + # Ensure no extra scripts passed through + for script in self.soup('script'): + script.decompose() + + # Update default footer and header + footer = self.soup.find('footer') + if footer: + # Remove divs that have multiple links beyond just page navigation + [_.decompose() for _ in footer.find_all('div', recursive=False) + if len(_.find_all('a', href=True)) > 3] + for link in footer.find_all('a', href=True): + link['href'] = f'{link["href"]}&preferences={self.config.preferences}' + + header = self.soup.find('header') + if header: + header.decompose() + self.remove_site_blocks(self.soup) + return self.soup + + def sanitize_div(self, div) -> None: + """Removes escaped script and iframe tags from results + + Returns: + None (The soup object is modified directly) + """ + if not div: + return + + for d in div.find_all('div', recursive=True): + d_text = d.find(text=True, recursive=False) + + # Ensure we're working with tags that contain text content + if not d_text or not d.string: + continue + + d.string = html.unescape(d_text) + div_soup = BeautifulSoup(d.string, 'html.parser') + + # Remove all valid script or iframe tags in the div + for script in div_soup.find_all('script'): + script.decompose() + + for iframe in div_soup.find_all('iframe'): + iframe.decompose() + + d.string = str(div_soup) + + def add_favicon(self, link) -> None: + """Adds icons for each returned result, using the result site's favicon + + Returns: + None (The soup object is modified directly) + """ + # Skip empty, parentless, or internal links + show_favicons = read_config_bool('WHOOGLE_SHOW_FAVICONS', True) + is_valid_link = link and link.parent and link['href'].startswith('http') + if not show_favicons or not is_valid_link: + return + + parent = link.parent + is_result_div = False + + # Check each parent to make sure that the div doesn't already have a + # favicon attached, and that the div is a result div + while parent: + p_cls = parent.attrs.get('class') or [] + if 'has-favicon' in p_cls or GClasses.scroller_class in p_cls: + return + elif GClasses.result_class_a not in p_cls: + parent = parent.parent + else: + is_result_div = True + break + + if not is_result_div: + return + + # Construct the html for inserting the icon into the parent div + parsed = urlparse.urlparse(link['href']) + favicon = self.encrypt_path( + f'{parsed.scheme}://{parsed.netloc}/favicon.ico', + is_element=True) + src = f'{self.root_url}/{Endpoint.element}?url={favicon}' + \ + '&type=image/x-icon' + html = f'' + + favicon = BeautifulSoup(html, 'html.parser') + link.parent.insert(0, favicon) + + # Update all parents to indicate that a favicon has been attached + parent = link.parent + while parent: + p_cls = parent.get('class') or [] + p_cls.append('has-favicon') + parent['class'] = p_cls + parent = parent.parent + + if GClasses.result_class_a in p_cls: + break + + def remove_site_blocks(self, soup) -> None: + if not self.config.block or not soup.body: + return + search_string = ' '.join(['-site:' + + _ for _ in self.config.block.split(',')]) + selected = soup.body.findAll(text=re.compile(search_string)) + + for result in selected: + result.string.replace_with(result.string.replace( + search_string, '')) + + def remove_ads(self) -> None: + """Removes ads found in the list of search result divs + + Returns: + None (The soup object is modified directly) + """ + if not self.main_divs: + return + + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + div_ads = [_ for _ in div.find_all('span', recursive=True) + if has_ad_content(_.text)] + _ = div.decompose() if len(div_ads) else None + + def remove_block_titles(self) -> None: + if not self.main_divs or not self.config.block_title: + return + block_title = re.compile(self.config.block_title) + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + block_divs = [_ for _ in div.find_all('h3', recursive=True) + if block_title.search(_.text) is not None] + _ = div.decompose() if len(block_divs) else None + + def remove_block_url(self) -> None: + if not self.main_divs or not self.config.block_url: + return + block_url = re.compile(self.config.block_url) + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + block_divs = [_ for _ in div.find_all('a', recursive=True) + if block_url.search(_.attrs['href']) is not None] + _ = div.decompose() if len(block_divs) else None + + def remove_block_tabs(self) -> None: + if self.main_divs: + for div in self.main_divs.find_all( + 'div', + attrs={'class': f'{GClasses.main_tbm_tab}'} + ): + _ = div.decompose() + else: + # when in images tab + for div in self.soup.find_all( + 'div', + attrs={'class': f'{GClasses.images_tbm_tab}'} + ): + _ = div.decompose() + + def collapse_sections(self) -> None: + """Collapses long result sections ("people also asked", "related + searches", etc) into "details" elements + + These sections are typically the only sections in the results page that + have more than ~5 child divs within a primary result div. + + Returns: + None (The soup object is modified directly) + """ + minimal_mode = read_config_bool('WHOOGLE_MINIMAL') + + def pull_child_divs(result_div: BeautifulSoup): + try: + return result_div.findChildren( + 'div', recursive=False + )[0].findChildren( + 'div', recursive=False) + except IndexError: + return [] + + if not self.main_divs: + return + #töörölni kell People also ask, + search_terms = ["People also search for", "Related searches", "Kapcsolódó keresések", "Mások ezeket keresték még"] + details_list = [] + + # Loop through results and check for the number of child divs in each + for result in self.main_divs.find_all(): + result_children = pull_child_divs(result) + if minimal_mode: + if any(f">{x} 1: + subtitle = ' (' + \ + ''.join(content[1:]) + ')' + elem.decompose() + break + + # Determine the class based on the label content + if any(term in label for term in search_terms): + details_class = 'search-recommendations' + details_attrs = {'class': details_class, 'open': 'true'} + else: + details_class = 'other-results' + details_attrs = {'class': details_class} + + + # Create the new details element to wrap around the result's + # first parent + parent = None + idx = 0 + while not parent and idx < len(result_children): + parent = result_children[idx].parent + idx += 1 + + details = BeautifulSoup(features='html.parser').new_tag('details', attrs=details_attrs) + summary = BeautifulSoup(features='html.parser').new_tag('summary', attrs={'class': "summary_div"}) + summary.string = label + + if subtitle: + soup = BeautifulSoup(subtitle, 'html.parser') + summary.append(soup) + + details.append(summary) + + if parent and not minimal_mode: + parent.wrap(details) + elif parent and minimal_mode: + # Remove parent element from document if "minimal mode" is + # enabled + parent.decompose() + + for details in details_list: + self.main_divs.append(details) + + def update_element_src(self, element: Tag, mime: str, attr='src') -> None: + """Encrypts the original src of an element and rewrites the element src + to use the "/element?src=" pass-through. + + Returns: + None (The soup element is modified directly) + + """ + src = element[attr].split(' ')[0] + + if src.startswith('//'): + src = 'https:' + src + elif src.startswith('data:'): + return + + if src.startswith(LOGO_URL): + # Re-brand with Whoogle logo + element.replace_with(BeautifulSoup( + render_template('logo.html'), + features='html.parser')) + return + elif src.startswith(G_M_LOGO_URL): + # Re-brand with single-letter Whoogle logo + element['src'] = 'static/img/favicon/apple-icon.png' + element.parent['href'] = 'home' + return + elif src.startswith(GOOG_IMG) or GOOG_STATIC in src: + element['src'] = BLANK_B64 + return + + element[attr] = f'{self.root_url}/{Endpoint.element}?url=' + ( + self.encrypt_path( + src, + is_element=True + ) + '&type=' + urlparse.quote(mime) + ) + + def update_css(self) -> None: + """Updates URLs used in inline styles to be proxied by Whoogle + using the /element endpoint. + + Returns: + None (The soup element is modified directly) + + """ + # Filter all +

+
+

0

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ diff --git a/app/templates/display.html b/app/templates/display.html new file mode 100755 index 0000000..a09a58e --- /dev/null +++ b/app/templates/display.html @@ -0,0 +1,40 @@ + + + + + {% if not search_type %} + + {% else %} + + {% endif %} + + + + + + + + + + {{ clean_query(query) }} - RaveSearch + + +{{ search_header|safe }} +{% if is_translation %} + +{% endif %} +{{ response|safe }} + +{% include 'footer.html' %} +{% if autocomplete_enabled == '1' %} + +{% endif %} + + + + diff --git a/app/templates/error.html b/app/templates/error.html new file mode 100755 index 0000000..dcaf3cf --- /dev/null +++ b/app/templates/error.html @@ -0,0 +1,106 @@ +{% if config.theme %} + {% if config.theme == 'system' %} + + {% else %} + + {% endif %} +{% else %} + +{% endif %} + + + +
+

Error

+

+ {{ error_message }} +

+
+ {% if query and translation %} +

+

{{ translation['continue-search'] }}

+ +
+

Other options:

+ +
+

+ {% endif %} + Return Home +
diff --git a/app/templates/footer.html b/app/templates/footer.html new file mode 100755 index 0000000..821a67e --- /dev/null +++ b/app/templates/footer.html @@ -0,0 +1,18 @@ + + + diff --git a/app/templates/header.html b/app/templates/header.html new file mode 100755 index 0000000..6d558ae --- /dev/null +++ b/app/templates/header.html @@ -0,0 +1,90 @@ + +
+ + + + +
+
+ {% for tab_id, tab_content in tabs.items() %} + {% if tab_content['selected'] %} + + {% else %} + + {% endif %} + {% endfor %} + + +
+
+
+
+ +
+
+ + +

+ + +
+
+ + + diff --git a/app/templates/header2.html b/app/templates/header2.html new file mode 100755 index 0000000..959adc0 --- /dev/null +++ b/app/templates/header2.html @@ -0,0 +1,210 @@ +{% if mobile %} +
+
+
+ +
+
+ {% if config.preferences %} + + {% endif %} + + + + + +
+
+
+
+
+
+
+
+
+
+ {% for tab_id, tab_content in tabs.items() %} + {% if tab_content['selected'] %} + {{ tab_content['name'] }} + {% else %} + {{ tab_content['name'] }} + {% endif %} + {% endfor %} + + +
+
+
+
+
+
+
+
+{% else %} +
+ +
+
+
+
+ {% if config.preferences %} + + {% endif %} + + + + + +
+
+
+
+
+
+
+
+
+
+
+ {% for tab_id, tab_content in tabs.items() %} + {% if tab_content['selected'] %} + {{ tab_content['name'] }} + {% else %} + {{ tab_content['name'] }} + {% endif %} + {% endfor %} + + +
+
+
+
+
+
+{% endif %} +
+
+ + +
+ + +
+
+ + + + + + + + + +
+ +
+ {% set display_tooltip = true %} + {% include 'simple/categories.html' %} +
+ +
+ {% include 'simple/filters/languages.html' %} + {% include 'simple/filters/time_range.html' %} + {% include 'simple/filters/safesearch.html' %} +
+ + {% if timeout_limit %}{% endif %} + + + + +
{{- '' -}} +
+ {%- if not search_on_category_select or not display_tooltip -%} + {%- for category in categories_as_tabs -%} +
{{- '' -}} + + +
+ {%- endfor -%} + {%- if display_tooltip %}
{{ _('Click on the magnifier to perform search') }}
{% endif -%} + {%- else -%} + {%- for category in categories_as_tabs -%}{{- '\n' -}} + {{- '' -}} + {%- endfor -%} + {{- '\n' -}} + {%- endif -%} +
{{- '' -}} +
diff --git a/app/templates/imageresults.html b/app/templates/imageresults.html new file mode 100755 index 0000000..767564e --- /dev/null +++ b/app/templates/imageresults.html @@ -0,0 +1,38 @@ +
+
+ {% for result in results %} + + {% endfor %} +
+ +
+ +
+
+ + +
+
+ +
+
+ +
\ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100755 index 0000000..31d0ccc --- /dev/null +++ b/app/templates/index.html @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + {% if autocomplete_enabled == '1' %} + + {% endif %} + + + + + + + + + + RaveSearch + + +
+
+ {{ logo|safe }} +
+
+
+
+ {% if config.preferences %} + + {% endif %} + +
+ +
+ +
+ + {% if not config_disabled %} + +
+
+
+
+ + +
+ + +
+
+ + +
+ + + + + +
+ + +
+
+ + +
+ + + + + +
+ + +
+
+ + +
— {{ translation['config-alts-help'] }}
+
+
+ + +
+ +
+ + +
+
+ + +
+ +
+ + +
+
+ + {{ translation['config-css'] }}: + + +
+
+ + +
— {{ translation['config-pref-help'] }}
+ + +
+
+
+   +   + +
+
+
+
+ {% endif %} + + + +
+{% include 'footer.html' %} + + + diff --git a/app/templates/logo.html b/app/templates/logo.html new file mode 100755 index 0000000..af64c22 --- /dev/null +++ b/app/templates/logo.html @@ -0,0 +1,8 @@ + diff --git a/app/templates/opensearch.xml b/app/templates/opensearch.xml new file mode 100755 index 0000000..856a6db --- /dev/null +++ b/app/templates/opensearch.xml @@ -0,0 +1,25 @@ + + + {% if not search_type %} + RaveSearch + {% else %} + RaveSearch {{ search_name }} + {% endif %} + RaveSearch: A self-hosted, ad-free, privacy-respecting metasearch engine + UTF-8 + +  + + + + {% if search_type %} + + {% endif %} + + + + + {{ main_url }}/search + + diff --git a/app/templates/search.html b/app/templates/search.html new file mode 100755 index 0000000..634b707 --- /dev/null +++ b/app/templates/search.html @@ -0,0 +1,15 @@ +
+ + +
diff --git a/app/utils/.DS_Store b/app/utils/.DS_Store new file mode 100755 index 0000000..8942dfb Binary files /dev/null and b/app/utils/.DS_Store differ diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/app/utils/__pycache__/.DS_Store b/app/utils/__pycache__/.DS_Store new file mode 100755 index 0000000..5008ddf Binary files /dev/null and b/app/utils/__pycache__/.DS_Store differ diff --git a/app/utils/__pycache__/__init__.cpython-311.pyc b/app/utils/__pycache__/__init__.cpython-311.pyc new file mode 100755 index 0000000..28222dc Binary files /dev/null and b/app/utils/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/utils/__pycache__/__init__.cpython-312.pyc b/app/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..d301691 Binary files /dev/null and b/app/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/utils/__pycache__/bangs.cpython-311.pyc b/app/utils/__pycache__/bangs.cpython-311.pyc new file mode 100755 index 0000000..0122e51 Binary files /dev/null and b/app/utils/__pycache__/bangs.cpython-311.pyc differ diff --git a/app/utils/__pycache__/bangs.cpython-312.pyc b/app/utils/__pycache__/bangs.cpython-312.pyc new file mode 100644 index 0000000..7c79b1f Binary files /dev/null and b/app/utils/__pycache__/bangs.cpython-312.pyc differ diff --git a/app/utils/__pycache__/misc.cpython-311.pyc b/app/utils/__pycache__/misc.cpython-311.pyc new file mode 100755 index 0000000..193124f Binary files /dev/null and b/app/utils/__pycache__/misc.cpython-311.pyc differ diff --git a/app/utils/__pycache__/misc.cpython-312.pyc b/app/utils/__pycache__/misc.cpython-312.pyc new file mode 100644 index 0000000..a9b1f93 Binary files /dev/null and b/app/utils/__pycache__/misc.cpython-312.pyc differ diff --git a/app/utils/__pycache__/results.cpython-311.pyc b/app/utils/__pycache__/results.cpython-311.pyc new file mode 100755 index 0000000..7d3cde4 Binary files /dev/null and b/app/utils/__pycache__/results.cpython-311.pyc differ diff --git a/app/utils/__pycache__/results.cpython-312.pyc b/app/utils/__pycache__/results.cpython-312.pyc new file mode 100644 index 0000000..58cf264 Binary files /dev/null and b/app/utils/__pycache__/results.cpython-312.pyc differ diff --git a/app/utils/__pycache__/search.cpython-311.pyc b/app/utils/__pycache__/search.cpython-311.pyc new file mode 100755 index 0000000..93e7ee5 Binary files /dev/null and b/app/utils/__pycache__/search.cpython-311.pyc differ diff --git a/app/utils/__pycache__/search.cpython-312.pyc b/app/utils/__pycache__/search.cpython-312.pyc new file mode 100644 index 0000000..104e178 Binary files /dev/null and b/app/utils/__pycache__/search.cpython-312.pyc differ diff --git a/app/utils/__pycache__/session.cpython-311.pyc b/app/utils/__pycache__/session.cpython-311.pyc new file mode 100755 index 0000000..17d8f32 Binary files /dev/null and b/app/utils/__pycache__/session.cpython-311.pyc differ diff --git a/app/utils/__pycache__/session.cpython-312.pyc b/app/utils/__pycache__/session.cpython-312.pyc new file mode 100644 index 0000000..276bb14 Binary files /dev/null and b/app/utils/__pycache__/session.cpython-312.pyc differ diff --git a/app/utils/__pycache__/widgets.cpython-311.pyc b/app/utils/__pycache__/widgets.cpython-311.pyc new file mode 100755 index 0000000..6c20c84 Binary files /dev/null and b/app/utils/__pycache__/widgets.cpython-311.pyc differ diff --git a/app/utils/__pycache__/widgets.cpython-312.pyc b/app/utils/__pycache__/widgets.cpython-312.pyc new file mode 100644 index 0000000..fd813e9 Binary files /dev/null and b/app/utils/__pycache__/widgets.cpython-312.pyc differ diff --git a/app/utils/bangs.py b/app/utils/bangs.py new file mode 100755 index 0000000..4e7a82f --- /dev/null +++ b/app/utils/bangs.py @@ -0,0 +1,146 @@ +import json +import requests +import urllib.parse as urlparse +import os +import glob + +bangs_dict = {} +DDG_BANGS = 'https://duckduckgo.com/bang.js' + + +def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}): + """Loads all the bang files in alphabetical order + Args: + ddg_bangs_file: The str path to the new DDG bangs json file + ddg_bangs: The dict of ddg bangs. If this is empty, it will load the + bangs from the file + Returns: + None + """ + global bangs_dict + ddg_bangs_file = os.path.normpath(ddg_bangs_file) + + if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4: + return + + bangs = {} + bangs_dir = os.path.dirname(ddg_bangs_file) + bang_files = glob.glob(os.path.join(bangs_dir, '*.json')) + + # Normalize the paths + bang_files = [os.path.normpath(f) for f in bang_files] + + # Move the ddg bangs file to the beginning + bang_files = sorted([f for f in bang_files if f != ddg_bangs_file]) + + if ddg_bangs: + bangs |= ddg_bangs + else: + bang_files.insert(0, ddg_bangs_file) + + for i, bang_file in enumerate(bang_files): + try: + bangs |= json.load(open(bang_file)) + except json.decoder.JSONDecodeError: + # Ignore decoding error only for the ddg bangs file, since this can + # occur if file is still being written + if i != 0: + raise + + bangs_dict = dict(sorted(bangs.items())) + + +def gen_bangs_json(bangs_file: str) -> None: + """Generates a json file from the DDG bangs list + + Args: + bangs_file: The str path to the new DDG bangs json file + + Returns: + None + + """ + try: + # Request full list from DDG + r = requests.get(DDG_BANGS) + r.raise_for_status() + except requests.exceptions.HTTPError as err: + raise SystemExit(err) + + # Convert to json + data = json.loads(r.text) + + # Set up a json object (with better formatting) for all available bangs + bangs_data = {} + + for row in data: + bang_command = '!' + row['t'] + bangs_data[bang_command] = { + 'url': row['u'].replace('{{{s}}}', '{}'), + 'suggestion': bang_command + ' (' + row['s'] + ')' + } + + json.dump(bangs_data, open(bangs_file, 'w')) + print('* Finished creating ddg bangs json') + load_all_bangs(bangs_file, bangs_data) + + +def suggest_bang(query: str) -> list[str]: + """Suggests bangs for a user's query + Args: + query: The search query + Returns: + list[str]: A list of bang suggestions + """ + global bangs_dict + return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)] + + +def resolve_bang(query: str) -> str: + """Transform's a user's query to a bang search, if an operator is found + + Args: + query: The search query + + Returns: + str: A formatted redirect for a bang search, or an empty str if there + wasn't a match or didn't contain a bang operator + + """ + + global bangs_dict + + #if ! not in query simply return (speed up processing) + if '!' not in query: + return '' + + split_query = query.strip().split(' ') + + # look for operator in query if one is found, list operator should be of + # length 1, operator should not be case-sensitive here to remove it later + operator = [ + word + for word in split_query + if word.lower() in bangs_dict + ] + if len(operator) == 1: + # get operator + operator = operator[0] + + # removes operator from query + split_query.remove(operator) + + # rebuild the query string + bang_query = ' '.join(split_query).strip() + + # Check if operator is a key in bangs and get bang if exists + bang = bangs_dict.get(operator.lower(), None) + if bang: + bang_url = bang['url'] + + if bang_query: + return bang_url.replace('{}', bang_query, 1) + else: + parsed_url = urlparse.urlparse(bang_url) + return f'{parsed_url.scheme}://{parsed_url.netloc}' + return '' diff --git a/app/utils/misc.py b/app/utils/misc.py new file mode 100755 index 0000000..ee6d62e --- /dev/null +++ b/app/utils/misc.py @@ -0,0 +1,137 @@ +import base64 +import hashlib +import contextlib +import io +import os +import re + +from requests import exceptions, get +from urllib.parse import urlparse +from bs4 import BeautifulSoup as bsoup +from cryptography.fernet import Fernet +from flask import Request + +ddg_favicon_site = 'http://icons.duckduckgo.com/ip2' + +empty_gif = base64.b64decode( + 'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==') + +placeholder_img = base64.b64decode( + 'iVBORw0KGgoAAAANSUhEUgAAABkAAAAZCAYAAADE6YVjAAABF0lEQVRIS8XWPw9EMBQA8Eok' \ + 'JBKrMFqMBt//GzAYLTZ/VomExPDu6uLiaPteqVynBn0/75W2Vp7nEIYhe6p1XcespmmAd7Is' \ + 'M+4URcGiKPogvMMvmIS2eN9MOMKbKWgf54SYgI4vKkTuQKJKSJErkKzUSkQHUs0lilAg7GMh' \ + 'ISoIA/hYMiKCKIA2soeowCWEMkfHtUmrXLcyGYYBfN9HF8djiaglWzNZlgVs21YisoAUaEXG' \ + 'cQTP86QIFgi7vyLzPIPjOEIEC7ANQv/4aZrAdd0TUtc1i+MYnSsMWjPp+x6CIPgJVlUVS5KE' \ + 'DKig/+wnVzM4pnzaGeHd+ENlWbI0TbVLJBtw2uMfP63wc9d2kDCWxi5Q27bsBerSJ9afJbeL' \ + 'AAAAAElFTkSuQmCC' +) + + +def fetch_favicon(url: str) -> bytes: + """Fetches a favicon using DuckDuckGo's favicon retriever + + Args: + url: The url to fetch the favicon from + Returns: + bytes - the favicon bytes, or a placeholder image if one + was not returned + """ + response = get(f'{ddg_favicon_site}/{urlparse(url).netloc}.ico') + + if response.status_code == 200 and len(response.content) > 0: + tmp_mem = io.BytesIO() + tmp_mem.write(response.content) + tmp_mem.seek(0) + + return tmp_mem.read() + return placeholder_img + + +def gen_file_hash(path: str, static_file: str) -> str: + file_contents = open(os.path.join(path, static_file), 'rb').read() + file_hash = hashlib.md5(file_contents).hexdigest()[:8] + filename_split = os.path.splitext(static_file) + + return f'{filename_split[0]}.{file_hash}{filename_split[-1]}' + + +def read_config_bool(var: str, default: bool=False) -> bool: + val = os.getenv(var, '1' if default else '0') + # user can specify one of the following values as 'true' inputs (all + # variants with upper case letters will also work): + # ('true', 't', '1', 'yes', 'y') + return val.lower() in ('true', 't', '1', 'yes', 'y') + + +def get_client_ip(r: Request) -> str: + if r.environ.get('HTTP_X_FORWARDED_FOR') is None: + return r.environ['REMOTE_ADDR'] + return r.environ['HTTP_X_FORWARDED_FOR'] + + +def get_request_url(url: str) -> str: + if os.getenv('HTTPS_ONLY', False): + return url.replace('http://', 'https://', 1) + + return url + + +def get_proxy_host_url(r: Request, default: str, root=False) -> str: + scheme = r.headers.get('X-Forwarded-Proto', 'https') + http_host = r.headers.get('X-Forwarded-Host') + + full_path = r.full_path if not root else '' + if full_path.startswith('/'): + full_path = f'/{full_path}' + + if http_host: + prefix = os.environ.get('WHOOGLE_URL_PREFIX', '') + if prefix: + prefix = f'/{re.sub("[^0-9a-zA-Z]+", "", prefix)}' + return f'{scheme}://{http_host}{prefix}{full_path}' + + return default + + +def check_for_update(version_url: str, current: str) -> int: + # Check for the latest version of Whoogle + has_update = '' + with contextlib.suppress(exceptions.ConnectionError, AttributeError): + update = bsoup(get(version_url).text, 'html.parser') + latest = update.select_one('[class="Link--primary"]').string[1:] + current = int(''.join(filter(str.isdigit, current))) + latest = int(''.join(filter(str.isdigit, latest))) + has_update = '' if current >= latest else latest + + return has_update + + +def get_abs_url(url, page_url): + # Creates a valid absolute URL using a partial or relative URL + urls = { + "//": f"https:{url}", + "/": f"{urlparse(page_url).netloc}{url}", + "./": f"{page_url}{url[2:]}" + } + for start in urls: + if url.startswith(start): + return urls[start] + + return url + + +def list_to_dict(lst: list) -> dict: + if len(lst) < 2: + return {} + return {lst[i].replace(' ', ''): lst[i+1].replace(' ', '') + for i in range(0, len(lst), 2)} + + +def encrypt_string(key: bytes, string: str) -> str: + cipher_suite = Fernet(key) + return cipher_suite.encrypt(string.encode()).decode() + + +def decrypt_string(key: bytes, string: str) -> str: + cipher_suite = Fernet(g.session_key) + return cipher_suite.decrypt(string.encode()).decode() diff --git a/app/utils/results.py b/app/utils/results.py new file mode 100755 index 0000000..c87defd --- /dev/null +++ b/app/utils/results.py @@ -0,0 +1,466 @@ +from app.models.config import Config +from app.models.endpoint import Endpoint +from app.utils.misc import list_to_dict +from bs4 import BeautifulSoup, NavigableString +import copy +from flask import current_app +import html +import os +import urllib.parse as urlparse +from urllib.parse import parse_qs +import re +import warnings + +SKIP_ARGS = ['ref_src', 'utm'] +SKIP_PREFIX = ['//www.', '//mobile.', '//m.'] +GOOG_STATIC = 'www.gstatic.com' +G_M_LOGO_URL = 'https://www.gstatic.com/m/images/icons/googleg.gif' +GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' +LOGO_URL = GOOG_IMG + '_desk' +BLANK_B64 = ('data:image/png;base64,' + 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw' + 'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC') + +# Ad keywords +BLACKLIST = [ + 'ad', 'ads', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', + 'Reklama', 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', + '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', + 'Reklam', 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Szponzorált', + 'Anúncio', 'Quảng cáo','โฆษณา', 'sponsored', 'patrocinado', 'gesponsert', 'Sponzorováno', '스폰서', 'Gesponsord' +] + +SITE_ALTS = { + 'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'farside.link/nitter'), + 'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'farside.link/invidious'), + 'reddit.com': os.getenv('WHOOGLE_ALT_RD', 'farside.link/libreddit'), + **dict.fromkeys([ + 'medium.com', + 'levelup.gitconnected.com' + ], os.getenv('WHOOGLE_ALT_MD', 'farside.link/scribe')), + 'imgur.com': os.getenv('WHOOGLE_ALT_IMG', 'farside.link/rimgo'), + 'wikipedia.org': os.getenv('WHOOGLE_ALT_WIKI', 'farside.link/wikiless'), + 'imdb.com': os.getenv('WHOOGLE_ALT_IMDB', 'farside.link/libremdb'), + 'quora.com': os.getenv('WHOOGLE_ALT_QUORA', 'farside.link/quetre'), + 'stackoverflow.com': os.getenv('WHOOGLE_ALT_SO', 'farside.link/anonymousoverflow') +} + +# Include custom site redirects from WHOOGLE_REDIRECTS +SITE_ALTS.update(list_to_dict(re.split(',|:', os.getenv('WHOOGLE_REDIRECTS', '')))) + + +def contains_cjko(s: str) -> bool: + """This function check whether or not a string contains Chinese, Japanese, + or Korean characters. It employs regex and uses the u escape sequence to + match any character in a set of Unicode ranges. + + Args: + s (str): string to be checked + + Returns: + bool: True if the input s contains the characters and False otherwise + """ + unicode_ranges = ('\u4e00-\u9fff' # Chinese characters + '\u3040-\u309f' # Japanese hiragana + '\u30a0-\u30ff' # Japanese katakana + '\u4e00-\u9faf' # Japanese kanji + '\uac00-\ud7af' # Korean hangul syllables + '\u1100-\u11ff' # Korean hangul jamo + ) + return bool(re.search(fr'[{unicode_ranges}]', s)) + + +def bold_search_terms(response: str, query: str) -> BeautifulSoup: + """Wraps all search terms in bold tags (). If any terms are wrapped + in quotes, only that exact phrase will be made bold. + + Args: + response: The initial response body for the query + query: The original search query + + Returns: + BeautifulSoup: modified soup object with bold items + """ + response = BeautifulSoup(response, 'html.parser') + + def replace_any_case(element: NavigableString, target_word: str) -> None: + # Replace all instances of the word, but maintaining the same case in + # the replacement + if len(element) == len(target_word): + return + + # Ensure target word is escaped for regex + target_word = re.escape(target_word) + + # Check if the word contains Chinese, Japanese, or Korean characters + if contains_cjko(target_word): + reg_pattern = fr'((?![{{}}<>-]){target_word}(?![{{}}<>-]))' + else: + reg_pattern = fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b' + + if re.match(r'.*[@_!#$%^&*()<>?/\|}{~:].*', target_word) or ( + element.parent and element.parent.name == 'style'): + return + + element.replace_with(BeautifulSoup( + re.sub(reg_pattern, + r'\1', + element, + flags=re.I), 'html.parser') + ) + + # Split all words out of query, grouping the ones wrapped in quotes + for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query): + word = re.sub(r'[@_!#$%^&*()<>?/\|}{~:]+', '', word) + target = response.find_all( + text=re.compile(r'' + re.escape(word), re.I)) + for nav_str in target: + replace_any_case(nav_str, word) + + return response + + +def has_ad_content(element: str) -> bool: + """Inspects an HTML element for ad related content + + Args: + element: The HTML element to inspect + + Returns: + bool: True/False for the element containing an ad + + """ + element_str = ''.join(filter(str.isalpha, element)) + return (element_str.upper() in (value.upper() for value in BLACKLIST) + or 'ⓘ' in element) + + +def get_first_link(soup: BeautifulSoup) -> str: + """Retrieves the first result link from the query response + + Args: + soup: The BeautifulSoup response body + + Returns: + str: A str link to the first result + + """ + first_link = '' + orig_details = [] + + # Temporarily remove details so we don't grab those links + for details in soup.find_all('details'): + temp_details = soup.new_tag('removed_details') + orig_details.append(details.replace_with(temp_details)) + + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + # Return the first search result URL + if a['href'].startswith('http://') or a['href'].startswith('https://'): + first_link = a['href'] + break + + # Add the details back + for orig_detail, details in zip(orig_details, soup.find_all('removed_details')): + details.replace_with(orig_detail) + + return first_link + + +def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str: + """Returns an alternative to a particular site, if one is configured + + Args: + link: A string result URL to check against the site_alts map + site_alts: A map of site alternatives to replace with. defaults to SITE_ALTS + + Returns: + str: An updated (or ignored) result link + + """ + # Need to replace full hostname with alternative to encapsulate + # subdomains as well + parsed_link = urlparse.urlparse(link) + + # Extract subdomain separately from the domain+tld. The subdomain + # is used for wikiless translations. + split_host = parsed_link.netloc.split('.') + subdomain = split_host[0] if len(split_host) > 2 else '' + hostname = '.'.join(split_host[-2:]) + + # The full scheme + hostname is used when comparing against the list of + # available alternative services, due to how Medium links are constructed. + # (i.e. for medium.com: "https://something.medium.com" should match, + # "https://medium.com/..." should match, but "philomedium.com" should not) + hostcomp = f'{parsed_link.scheme}://{hostname}' + + for site_key in site_alts.keys(): + site_alt = f'{parsed_link.scheme}://{site_key}' + if not hostname or site_alt not in hostcomp or not site_alts[site_key]: + continue + + # Wikipedia -> Wikiless replacements require the subdomain (if it's + # a 2-char language code) to be passed as a URL param to Wikiless + # in order to preserve the language setting. + params = '' + if 'wikipedia' in hostname and len(subdomain) == 2: + hostname = f'{subdomain}.{hostname}' + params = f'?lang={subdomain}' + elif 'medium' in hostname and len(subdomain) > 0: + hostname = f'{subdomain}.{hostname}' + + parsed_alt = urlparse.urlparse(site_alts[site_key]) + link = link.replace(hostname, site_alts[site_key]) + params + # If a scheme is specified in the alternative, this results in a + # replaced link that looks like "https://http://altservice.tld". + # In this case, we can remove the original scheme from the result + # and use the one specified for the alt. + if parsed_alt.scheme: + link = '//'.join(link.split('//')[1:]) + + for prefix in SKIP_PREFIX: + if parsed_alt.scheme: + # If a scheme is specified, remove everything before the + # first occurence of it + link = f'{parsed_alt.scheme}{link.split(parsed_alt.scheme, 1)[-1]}' + else: + # Otherwise, replace the first occurrence of the prefix + link = link.replace(prefix, '//', 1) + break + + return link + + +def filter_link_args(link: str) -> str: + """Filters out unnecessary URL args from a result link + + Args: + link: The string result link to check for extraneous URL params + + Returns: + str: An updated (or ignored) result link + + """ + parsed_link = urlparse.urlparse(link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + link = link.replace(parsed_link.query, '') + if len(safe_args) > 0: + link = link + urlparse.urlencode(safe_args, doseq=True) + else: + link = link.replace('?', '') + + return link + + +def append_nojs(result: BeautifulSoup) -> None: + """Appends a no-Javascript alternative for a search result + + Args: + result: The search result to append a no-JS link to + + Returns: + None + + """ + nojs_link = BeautifulSoup(features='html.parser').new_tag('a') + nojs_link['href'] = f'{Endpoint.window}?nojs=1&location=' + result['href'] + nojs_link.string = ' NoJS Link' + result.append(nojs_link) + + +def append_anon_view(result: BeautifulSoup, config: Config) -> None: + """Appends an 'anonymous view' for a search result, where all site + contents are viewed through Whoogle as a proxy. + + Args: + result: The search result to append an anon view link to + nojs: Remove Javascript from Anonymous View + + Returns: + None + + """ + av_link = BeautifulSoup(features='html.parser').new_tag('a') + nojs = 'nojs=1' if config.nojs else 'nojs=0' + location = f'location={result["href"]}' + av_link['href'] = f'{Endpoint.window}?{nojs}&{location}' + translation = current_app.config['TRANSLATIONS'][ + config.get_localization_lang() + ] + av_link.string = f'{translation["anon-view"]}' + av_link['class'] = 'anon-view' + result.append(av_link) + +def check_currency(response: str) -> dict: + """Check whether the results have currency conversion + + Args: + response: Search query Result + + Returns: + dict: Consists of currency names and values + + """ + soup = BeautifulSoup(response, 'html.parser') + currency_link = soup.find('a', {'href': 'https://g.co/gfd'}) + if currency_link: + while 'class' not in currency_link.attrs or \ + 'ZINbbc' not in currency_link.attrs['class']: + if currency_link.parent: + currency_link = currency_link.parent + else: + return {} + currency_link = currency_link.find_all(class_='BNeawe') + currency1 = currency_link[0].text + currency2 = currency_link[1].text + currency1 = currency1.rstrip('=').split(' ', 1) + currency2 = currency2.split(' ', 1) + + # Handle differences in currency formatting + # i.e. "5.000" vs "5,000" + if currency2[0][-3] == ',': + currency1[0] = currency1[0].replace('.', '') + currency1[0] = currency1[0].replace(',', '.') + currency2[0] = currency2[0].replace('.', '') + currency2[0] = currency2[0].replace(',', '.') + else: + currency1[0] = currency1[0].replace(',', '') + currency2[0] = currency2[0].replace(',', '') + + currency1_value = float(re.sub(r'[^\d\.]', '', currency1[0])) + currency1_label = currency1[1] + + currency2_value = float(re.sub(r'[^\d\.]', '', currency2[0])) + currency2_label = currency2[1] + + return {'currencyValue1': currency1_value, + 'currencyLabel1': currency1_label, + 'currencyValue2': currency2_value, + 'currencyLabel2': currency2_label + } + return {} + + +def add_currency_card(soup: BeautifulSoup, + conversion_details: dict) -> BeautifulSoup: + """Adds the currency conversion boxes + to response of the search query + + Args: + soup: Parsed search result + conversion_details: Dictionary of currency + related information + + Returns: + BeautifulSoup + """ + # Element before which the code will be changed + # (This is the 'disclaimer' link) + element1 = soup.find('a', {'href': 'https://g.co/gfd'}) + + while 'class' not in element1.attrs or \ + 'nXE3Ob' not in element1.attrs['class']: + element1 = element1.parent + + # Creating the conversion factor + conversion_factor = (conversion_details['currencyValue1'] / + conversion_details['currencyValue2']) + + # Creating a new div for the input boxes + conversion_box = soup.new_tag('div') + conversion_box['class'] = 'conversion_box' + + # Currency to be converted from + input_box1 = soup.new_tag('input') + input_box1['id'] = 'cb1' + input_box1['type'] = 'number' + input_box1['class'] = 'cb' + input_box1['value'] = conversion_details['currencyValue1'] + input_box1['oninput'] = f'convert(1, 2, {1 / conversion_factor})' + + label_box1 = soup.new_tag('label') + label_box1['for'] = 'cb1' + label_box1['class'] = 'cb_label' + label_box1.append(conversion_details['currencyLabel1']) + + br = soup.new_tag('br') + + # Currency to be converted to + input_box2 = soup.new_tag('input') + input_box2['id'] = 'cb2' + input_box2['type'] = 'number' + input_box2['class'] = 'cb' + input_box2['value'] = conversion_details['currencyValue2'] + input_box2['oninput'] = f'convert(2, 1, {conversion_factor})' + + label_box2 = soup.new_tag('label') + label_box2['for'] = 'cb2' + label_box2['class'] = 'cb_label' + label_box2.append(conversion_details['currencyLabel2']) + + conversion_box.append(input_box1) + conversion_box.append(label_box1) + conversion_box.append(br) + conversion_box.append(input_box2) + conversion_box.append(label_box2) + + element1.insert_before(conversion_box) + return soup + + +def get_tabs_content(tabs: dict, + full_query: str, + search_type: str, + preferences: str, + translation: dict) -> dict: + """Takes the default tabs content and updates it according to the query. + + Args: + tabs: The default content for the tabs + full_query: The original search query + search_type: The current search_type + translation: The translation to get the names of the tabs + + Returns: + dict: contains the name, the href and if the tab is selected or not + """ + map_query = full_query + if '-site:' in full_query: + block_idx = full_query.index('-site:') + map_query = map_query[:block_idx] + tabs = copy.deepcopy(tabs) + for tab_id, tab_content in tabs.items(): + # update name to desired language + if tab_id in translation: + tab_content['name'] = translation[tab_id] + + # update href with query + query = full_query.replace(f'&tbm={search_type}', '') + + if tab_content['tbm'] is not None: + query = f"{query}&tbm={tab_content['tbm']}" + + if preferences: + query = f"{query}&preferences={preferences}" + + tab_content['href'] = tab_content['href'].format( + query=query, + map_query=map_query) + + # update if selected tab (default all tab is selected) + if tab_content['tbm'] == search_type: + tabs['all']['selected'] = False + tab_content['selected'] = True + return tabs diff --git a/app/utils/search.py b/app/utils/search.py new file mode 100755 index 0000000..e76eee8 --- /dev/null +++ b/app/utils/search.py @@ -0,0 +1,194 @@ +import os +import re +from typing import Any +from app.filter import Filter +from app.request import gen_query +from app.utils.misc import get_proxy_host_url +from app.utils.results import get_first_link +from bs4 import BeautifulSoup as bsoup +from cryptography.fernet import Fernet, InvalidToken +from flask import g + +TOR_BANNER = '

You are using Tor


' +CAPTCHA = 'div class="g-recaptcha"' + + +def needs_https(url: str) -> bool: + """Checks if the current instance needs to be upgraded to HTTPS + + Note that all Heroku instances are available by default over HTTPS, but + do not automatically set up a redirect when visited over HTTP. + + Args: + url: The instance url + + Returns: + bool: True/False representing the need to upgrade + + """ + https_only = bool(os.getenv('HTTPS_ONLY', 0)) + is_heroku = url.endswith('.herokuapp.com') + is_http = url.startswith('http://') + + return (is_heroku and is_http) or (https_only and is_http) + + +def has_captcha(results: str) -> bool: + """Checks to see if the search results are blocked by a captcha + + Args: + results: The search page html as a string + + Returns: + bool: True/False indicating if a captcha element was found + + """ + return CAPTCHA in results + + +class Search: + """Search query preprocessor - used before submitting the query or + redirecting to another site + + Attributes: + request: the incoming flask request + config: the current user config settings + session_key: the flask user fernet key + """ + def __init__(self, request, config, session_key, cookies_disabled=False): + method = request.method + self.request = request + self.request_params = request.args if method == 'GET' else request.form + self.user_agent = request.headers.get('User-Agent') + self.feeling_lucky = False + self.config = config + self.session_key = session_key + self.query = '' + self.widget = '' + self.view_image = True + self.cookies_disabled = cookies_disabled + self.search_type = self.request_params.get( + 'tbm') if 'tbm' in self.request_params else '' + + def __getitem__(self, name) -> Any: + return getattr(self, name) + + def __setitem__(self, name, value) -> None: + return setattr(self, name, value) + + def __delitem__(self, name) -> None: + return delattr(self, name) + + def __contains__(self, name) -> bool: + return hasattr(self, name) + + def new_search_query(self) -> str: + """Parses a plaintext query into a valid string for submission + + Also decrypts the query string, if encrypted (in the case of + paginated results). + + Returns: + str: A valid query string + + """ + q = self.request_params.get('q') + + if q is None or len(q) == 0: + return '' + else: + # Attempt to decrypt if this is an internal link + try: + q = Fernet(self.session_key).decrypt(q.encode()).decode() + except InvalidToken: + pass + + # Strip '!' for "feeling lucky" queries + if match := re.search(r"(^|\s)!($|\s)", q): + self.feeling_lucky = True + start, end = match.span() + self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg]) + else: + self.feeling_lucky = False + self.query = q + + # Check for possible widgets + self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" + + "($|( *[^a-z0-9] *(((addres|address|adres|" + + "adress)|a)? *$)))", self.query.lower()) else self.widget + self.widget = 'calculator' if re.search( + r"\bcalculator\b|\bcalc\b|\bcalclator\b|\bmath\b", + self.query.lower()) else self.widget + return self.query + + def generate_response(self) -> str: + """Generates a response for the user's query + + Returns: + str: A string response to the search query, in the form of a URL + or string representation of HTML content. + + """ + mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent + # reconstruct url if X-Forwarded-Host header present + root_url = get_proxy_host_url( + self.request, + self.request.url_root, + root=True) + + content_filter = Filter(self.session_key, + root_url=root_url, + mobile=mobile, + config=self.config, + query=self.query) + full_query = gen_query(self.query, + self.request_params, + self.config) + self.full_query = full_query + + # force mobile search when view image is true and + # the request is not already made by a mobile + view_image = ('tbm=isch' in full_query + # and self.config.view_image + #and not g.user_request.mobile + ) + + get_body = g.user_request.send(query=full_query, + force_mobile=view_image, + user_agent=self.user_agent) + + + # Produce cleanable html soup from response + get_body_safed = get_body.text.replace("<","andlt;").replace(">","andgt;") + html_soup = bsoup(get_body_safed, 'html.parser') + + # Replace current soup if view_image is active + if view_image: + html_soup = content_filter.view_image(html_soup) + + # Indicate whether or not a Tor connection is active + if g.user_request.tor_valid: + html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser')) + + formatted_results = content_filter.clean(html_soup) + if self.feeling_lucky: + if lucky_link := get_first_link(formatted_results): + return lucky_link + + # Fall through to regular search if unable to find link + self.feeling_lucky = False + + # Append user config to all search links, if available + param_str = ''.join('&{}={}'.format(k, v) + for k, v in + self.request_params.to_dict(flat=True).items() + if self.config.is_safe_key(k)) + for link in formatted_results.find_all('a', href=True): + link['rel'] = "nofollow noopener noreferrer" + if 'search?' not in link['href'] or link['href'].index( + 'search?') > 1: + continue + link['href'] += param_str + + return str(formatted_results) + diff --git a/app/utils/session.py b/app/utils/session.py new file mode 100755 index 0000000..5bac42b --- /dev/null +++ b/app/utils/session.py @@ -0,0 +1,39 @@ +from cryptography.fernet import Fernet +from flask import current_app as app + +REQUIRED_SESSION_VALUES = ['uuid', 'config', 'key', 'auth'] + + +def generate_key() -> bytes: + """Generates a key for encrypting searches and element URLs + + Args: + cookies_disabled: Flag for whether or not cookies are disabled by the + user. If so, the user can only use the default key + generated on app init for queries. + + Returns: + str: A unique Fernet key + + """ + # Generate/regenerate unique key per user + return Fernet.generate_key() + + +def valid_user_session(session: dict) -> bool: + """Validates the current user session + + Args: + session: The current Flask user session + + Returns: + bool: True/False indicating that all required session values are + available + + """ + # Generate secret key for user if unavailable + for value in REQUIRED_SESSION_VALUES: + if value not in session: + return False + + return True diff --git a/app/utils/widgets.py b/app/utils/widgets.py new file mode 100755 index 0000000..156ada9 --- /dev/null +++ b/app/utils/widgets.py @@ -0,0 +1,71 @@ +from pathlib import Path +from bs4 import BeautifulSoup + + +# root +BASE_DIR = Path(__file__).parent.parent.parent + +def add_ip_card(html_soup: BeautifulSoup, ip: str) -> BeautifulSoup: + """Adds the client's IP address to the search results + if query contains keywords + + Args: + html_soup: The parsed search result containing the keywords + ip: ip address of the client + + Returns: + BeautifulSoup + + """ + main_div = html_soup.select_one('#main') + if main_div: + # HTML IP card tag + ip_tag = html_soup.new_tag('div') + ip_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi' + + # For IP Address html tag + ip_address = html_soup.new_tag('div') + ip_address['class'] = 'kCrYT ip-address-div' + ip_address.string = ip + + # Text below the IP address + ip_text = html_soup.new_tag('div') + ip_text.string = 'Your public IP address' + ip_text['class'] = 'kCrYT ip-text-div' + + # Adding all the above html tags to the IP card + ip_tag.append(ip_address) + ip_tag.append(ip_text) + + # Insert the element at the top of the result list + main_div.insert_before(ip_tag) + return html_soup + +def add_calculator_card(html_soup: BeautifulSoup) -> BeautifulSoup: + """Adds the a calculator widget to the search results + if query contains keywords + + Args: + html_soup: The parsed search result containing the keywords + + Returns: + BeautifulSoup + """ + main_div = html_soup.select_one('#main') + if main_div: + # absolute path + widget_file = open(BASE_DIR / 'app/static/widgets/calculator.html', encoding="utf8") + widget_tag = html_soup.new_tag('div') + widget_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi' + widget_tag['id'] = 'calculator-wrapper' + calculator_text = html_soup.new_tag('div') + calculator_text['class'] = 'kCrYT ip-address-div' + calculator_text.string = 'Calculator' + calculator_widget = html_soup.new_tag('div') + calculator_widget.append(BeautifulSoup(widget_file, 'html.parser')) + calculator_widget['class'] = 'kCrYT ip-text-div' + widget_tag.append(calculator_text) + widget_tag.append(calculator_widget) + main_div.insert_before(widget_tag) + widget_file.close() + return html_soup diff --git a/app/version.py b/app/version.py new file mode 100755 index 0000000..31eead6 --- /dev/null +++ b/app/version.py @@ -0,0 +1,7 @@ +import os + +optional_dev_tag = '' +if os.getenv('DEV_BUILD'): + optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') + +__version__ = '0.9.1' + optional_dev_tag diff --git a/charts/whoogle/.helmignore b/charts/whoogle/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/whoogle/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/whoogle/Chart.yaml b/charts/whoogle/Chart.yaml new file mode 100644 index 0000000..8ce2224 --- /dev/null +++ b/charts/whoogle/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: whoogle +description: A self hosted search engine on Kubernetes +type: application +version: 0.1.0 +appVersion: 0.9.1 + +icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png + +sources: + - https://github.com/benbusby/whoogle-search + - https://gitlab.com/benbusby/whoogle-search + - https://gogs.benbusby.com/benbusby/whoogle-search + +keywords: + - whoogle + - degoogle + - search + - google + - search-engine + - privacy + - tor + - python diff --git a/charts/whoogle/templates/NOTES.txt b/charts/whoogle/templates/NOTES.txt new file mode 100644 index 0000000..bbbf070 --- /dev/null +++ b/charts/whoogle/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "whoogle.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "whoogle.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "whoogle.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "whoogle.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/charts/whoogle/templates/_helpers.tpl b/charts/whoogle/templates/_helpers.tpl new file mode 100644 index 0000000..4b51048 --- /dev/null +++ b/charts/whoogle/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "whoogle.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "whoogle.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "whoogle.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "whoogle.labels" -}} +helm.sh/chart: {{ include "whoogle.chart" . }} +{{ include "whoogle.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "whoogle.selectorLabels" -}} +app.kubernetes.io/name: {{ include "whoogle.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "whoogle.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "whoogle.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/charts/whoogle/templates/deployment.yaml b/charts/whoogle/templates/deployment.yaml new file mode 100644 index 0000000..3da9f1e --- /dev/null +++ b/charts/whoogle/templates/deployment.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "whoogle.fullname" . }} + labels: + {{- include "whoogle.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "whoogle.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "whoogle.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .}} + - name: {{ . }} + {{- end }} + {{- end }} + serviceAccountName: {{ include "whoogle.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: whoogle + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.conf }} + env: + {{- range $k,$v := . }} + {{- if $v }} + - name: {{ $k }} + value: {{ tpl (toString $v) $ | quote }} + {{- end }} + {{- end }} + {{- end }} + ports: + - name: http + containerPort: {{ default 5000 .Values.conf.EXPOSE_PORT }} + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + {{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }} + httpHeaders: + - name: Authorization + value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }} + {{- end }} + readinessProbe: + httpGet: + path: / + port: http + {{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }} + httpHeaders: + - name: Authorization + value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/whoogle/templates/hpa.yaml b/charts/whoogle/templates/hpa.yaml new file mode 100644 index 0000000..74be742 --- /dev/null +++ b/charts/whoogle/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "whoogle.fullname" . }} + labels: + {{- include "whoogle.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "whoogle.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/charts/whoogle/templates/ingress.yaml b/charts/whoogle/templates/ingress.yaml new file mode 100644 index 0000000..7fec7e9 --- /dev/null +++ b/charts/whoogle/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "whoogle.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "whoogle.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/whoogle/templates/service.yaml b/charts/whoogle/templates/service.yaml new file mode 100644 index 0000000..96521c4 --- /dev/null +++ b/charts/whoogle/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "whoogle.fullname" . }} + labels: + {{- include "whoogle.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "whoogle.selectorLabels" . | nindent 4 }} diff --git a/charts/whoogle/templates/serviceaccount.yaml b/charts/whoogle/templates/serviceaccount.yaml new file mode 100644 index 0000000..de1398a --- /dev/null +++ b/charts/whoogle/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "whoogle.serviceAccountName" . }} + labels: + {{- include "whoogle.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/whoogle/templates/tests/test-connection.yaml b/charts/whoogle/templates/tests/test-connection.yaml new file mode 100644 index 0000000..bc06188 --- /dev/null +++ b/charts/whoogle/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "whoogle.fullname" . }}-test-connection" + labels: + {{- include "whoogle.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "whoogle.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/charts/whoogle/values.yaml b/charts/whoogle/values.yaml new file mode 100644 index 0000000..54beded --- /dev/null +++ b/charts/whoogle/values.yaml @@ -0,0 +1,115 @@ +# Default values for whoogle. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +nameOverride: "" +fullnameOverride: "" + +replicaCount: 1 +image: + repository: benbusby/whoogle-search + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + pullSecrets: [] + # - my-image-pull-secret + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +conf: {} + # WHOOGLE_URL_PREFIX: "" # The URL prefix to use for the whoogle instance (i.e. "/whoogle") + # WHOOGLE_DOTENV: "" # Load environment variables in whoogle.env + # WHOOGLE_USER: "" # The username for basic auth. WHOOGLE_PASS must also be set if used. + # WHOOGLE_PASS: "" # The password for basic auth. WHOOGLE_USER must also be set if used. + # WHOOGLE_PROXY_USER: "" # The username of the proxy server. + # WHOOGLE_PROXY_PASS: "" # The password of the proxy server. + # WHOOGLE_PROXY_TYPE: "" # The type of the proxy server. Can be "socks5", "socks4", or "http". + # WHOOGLE_PROXY_LOC: "" # The location of the proxy server (host or ip). + # EXPOSE_PORT: "" # The port where Whoogle will be exposed. (default 5000) + # HTTPS_ONLY: "" # Enforce HTTPS. (See https://github.com/benbusby/whoogle-search#https-enforcement) + # WHOOGLE_ALT_TW: "" # The twitter.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_YT: "" # The youtube.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_RD: "" # The reddit.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_TL: "" # The Google Translate alternative to use. This is used for all "translate ____" searches. + # WHOOGLE_ALT_MD: "" # The medium.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_IMG: "" # The imgur.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_WIKI: "" # The wikipedia.com alternative to use when site alternatives are enabled in the config. + # WHOOGLE_ALT_IMDB: "" # The imdb.com alternative to use. Set to "" to continue using imdb.com when site alternatives are enabled. + # WHOOGLE_ALT_QUORA: "" # The quora.com alternative to use. Set to "" to continue using quora.com when site alternatives are enabled. + # WHOOGLE_ALT_SO: "" # The stackoverflow.com alternative to use. Set to "" to continue using stackoverflow.com when site alternatives are enabled. + # WHOOGLE_AUTOCOMPLETE: "" # Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable + # WHOOGLE_MINIMAL: "" # Remove everything except basic result cards from all search queries. + + # WHOOGLE_CONFIG_DISABLE: "" # Hide config from UI and disallow changes to config by client + # WHOOGLE_CONFIG_COUNTRY: "" # Filter results by hosting country + # WHOOGLE_CONFIG_LANGUAGE: "" # Set interface language + # WHOOGLE_CONFIG_SEARCH_LANGUAGE: "" # Set search result language + # WHOOGLE_CONFIG_BLOCK: "" # Block websites from search results (use comma-separated list) + # WHOOGLE_CONFIG_THEME: "" # Set theme mode (light, dark, or system) + # WHOOGLE_CONFIG_SAFE: "" # Enable safe searches + # WHOOGLE_CONFIG_ALTS: "" # Use social media site alternatives (nitter, invidious, etc) + # WHOOGLE_CONFIG_NEAR: "" # Restrict results to only those near a particular city + # WHOOGLE_CONFIG_TOR: "" # Use Tor routing (if available) + # WHOOGLE_CONFIG_NEW_TAB: "" # Always open results in new tab + # WHOOGLE_CONFIG_VIEW_IMAGE: "" # Enable View Image option + # WHOOGLE_CONFIG_GET_ONLY: "" # Search using GET requests only + # WHOOGLE_CONFIG_URL: "" # The root url of the instance (https:///) + # WHOOGLE_CONFIG_STYLE: "" # The custom CSS to use for styling (should be single line) + # WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED: "" # Encrypt preferences token, requires key + # WHOOGLE_CONFIG_PREFERENCES_KEY: "" # Key to encrypt preferences in URL (REQUIRED to show url) + +podAnnotations: {} +podSecurityContext: {} + # fsGroup: 2000 +securityContext: + runAsUser: 0 + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + +service: + type: ClusterIP + port: 5000 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: whoogle.example.com + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - whoogle.example.com + +resources: {} + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} +tolerations: [] +affinity: {} diff --git a/docker-compose-traefik.yaml b/docker-compose-traefik.yaml new file mode 100644 index 0000000..4cfb588 --- /dev/null +++ b/docker-compose-traefik.yaml @@ -0,0 +1,81 @@ +# can't use mem_limit in a 3.x docker-compose file in non swarm mode +# see https://github.com/docker/compose/issues/4513 +version: "2.4" + +services: + traefik: + image: "traefik:v2.7" + container_name: "traefik" + command: + #- "--log.level=DEBUG" + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.websecure.address=:443" + - "--certificatesresolvers.myresolver.acme.tlschallenge=true" + #- "--certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" + - "--certificatesresolvers.myresolver.acme.email=change@domain.name" + - "--certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json" + ports: + - "443:443" + - "8080:8080" + volumes: + - "./letsencrypt:/letsencrypt" + - "/var/run/docker.sock:/var/run/docker.sock:ro" + + whoogle-search: + labels: + - "traefik.enable=true" + - "traefik.http.routers.whoami.rule=Host(`change.host.name`)" + - "traefik.http.routers.whoami.entrypoints=websecure" + - "traefik.http.routers.whoami.tls.certresolver=myresolver" + - "traefik.http.services.whoogle-search.loadbalancer.server.port=5000" + image: ${WHOOGLE_IMAGE:-benbusby/whoogle-search} + container_name: whoogle-search + restart: unless-stopped + pids_limit: 50 + mem_limit: 256mb + memswap_limit: 256mb + # user debian-tor from tor package + user: whoogle + security_opt: + - no-new-privileges + cap_drop: + - ALL + tmpfs: + - /config/:size=10M,uid=927,gid=927,mode=1700 + - /var/lib/tor/:size=15M,uid=927,gid=927,mode=1700 + - /run/tor/:size=1M,uid=927,gid=927,mode=1700 + environment: # Uncomment to configure environment variables + # Basic auth configuration, uncomment to enable + #- WHOOGLE_USER= + #- WHOOGLE_PASS= + # Proxy configuration, uncomment to enable + #- WHOOGLE_PROXY_USER= + #- WHOOGLE_PROXY_PASS= + #- WHOOGLE_PROXY_TYPE= + # Site alternative configurations, uncomment to enable + # Note: If not set, the feature will still be available + # with default values. + #- WHOOGLE_ALT_TW=farside.link/nitter + #- WHOOGLE_ALT_YT=farside.link/invidious + #- WHOOGLE_ALT_IG=farside.link/bibliogram/u + #- WHOOGLE_ALT_RD=farside.link/libreddit + #- WHOOGLE_ALT_MD=farside.link/scribe + #- WHOOGLE_ALT_TL=farside.link/lingva + #- WHOOGLE_ALT_IMG=farside.link/rimgo + #- WHOOGLE_ALT_WIKI=farside.link/wikiless + #- WHOOGLE_ALT_IMDB=farside.link/libremdb + #- WHOOGLE_ALT_QUORA=farside.link/quetre + #- WHOOGLE_ALT_SO=farside.link/anonymousoverflow + # - WHOOGLE_CONFIG_DISABLE=1 + # - WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en + # - WHOOGLE_CONFIG_GET_ONLY=1 + # - WHOOGLE_CONFIG_COUNTRY=FR + # - WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED=1 + # - WHOOGLE_CONFIG_PREFERENCES_KEY="NEEDS_TO_BE_MODIFIED" + #env_file: # Alternatively, load variables from whoogle.env + #- whoogle.env + ports: + - 8000:5000 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0a693e6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,49 @@ +# can't use mem_limit in a 3.x docker-compose file in non swarm mode +# see https://github.com/docker/compose/issues/4513 +version: "2.4" + +services: + whoogle-search: + image: ${WHOOGLE_IMAGE:-benbusby/whoogle-search} + container_name: whoogle-search + restart: unless-stopped + pids_limit: 50 + mem_limit: 256mb + memswap_limit: 256mb + # user debian-tor from tor package + user: whoogle + security_opt: + - no-new-privileges + cap_drop: + - ALL + tmpfs: + - /config/:size=10M,uid=927,gid=927,mode=1700 + - /var/lib/tor/:size=15M,uid=927,gid=927,mode=1700 + - /run/tor/:size=1M,uid=927,gid=927,mode=1700 + #environment: # Uncomment to configure environment variables + # Basic auth configuration, uncomment to enable + #- WHOOGLE_USER= + #- WHOOGLE_PASS= + # Proxy configuration, uncomment to enable + #- WHOOGLE_PROXY_USER= + #- WHOOGLE_PROXY_PASS= + #- WHOOGLE_PROXY_TYPE= + # Site alternative configurations, uncomment to enable + # Note: If not set, the feature will still be available + # with default values. + #- WHOOGLE_ALT_TW=farside.link/nitter + #- WHOOGLE_ALT_YT=farside.link/invidious + #- WHOOGLE_ALT_IG=farside.link/bibliogram/u + #- WHOOGLE_ALT_RD=farside.link/libreddit + #- WHOOGLE_ALT_MD=farside.link/scribe + #- WHOOGLE_ALT_TL=farside.link/lingva + #- WHOOGLE_ALT_IMG=farside.link/rimgo + #- WHOOGLE_ALT_WIKI=farside.link/wikiless + #- WHOOGLE_ALT_IMDB=farside.link/libremdb + #- WHOOGLE_ALT_QUORA=farside.link/quetre + #- WHOOGLE_ALT_SO=farside.link/anonymousoverflow + #env_file: # Alternatively, load variables from whoogle.env + #- whoogle.env + ports: + - 5000:5000 diff --git a/docs/banner.png b/docs/banner.png new file mode 100644 index 0000000..2b895c1 Binary files /dev/null and b/docs/banner.png differ diff --git a/docs/screenshot_desktop.png b/docs/screenshot_desktop.png new file mode 100644 index 0000000..098ec13 Binary files /dev/null and b/docs/screenshot_desktop.png differ diff --git a/docs/screenshot_mobile.png b/docs/screenshot_mobile.png new file mode 100644 index 0000000..5d6e3f0 Binary files /dev/null and b/docs/screenshot_mobile.png differ diff --git a/filter.py b/filter.py new file mode 100755 index 0000000..68be403 --- /dev/null +++ b/filter.py @@ -0,0 +1,785 @@ +import cssutils +from bs4 import BeautifulSoup +from bs4.element import ResultSet, Tag +from cryptography.fernet import Fernet +from flask import render_template +import html +import urllib.parse as urlparse +from urllib.parse import parse_qs +import re + +from app.models.g_classes import GClasses +from app.request import VALID_PARAMS, MAPS_URL +from app.utils.misc import get_abs_url, read_config_bool +from app.utils.results import ( + BLANK_B64, GOOG_IMG, GOOG_STATIC, G_M_LOGO_URL, LOGO_URL, SITE_ALTS, + has_ad_content, filter_link_args, append_anon_view, get_site_alt, +) +from app.models.endpoint import Endpoint +from app.models.config import Config + + +MAPS_ARGS = ['q', 'daddr'] + +minimal_mode_sections = ['Top stories', 'Images', 'People also ask'] +unsupported_g_pages = [ + 'google.com/aclk' + '*.googleapis.com' + '*.gstatic.com' + '*.google-analytics.com' + 'adservice.google.com' + 'support.google.com', + 'accounts.google.com', + 'policies.google.com', + 'google.com/preferences', + 'google.com/intl', + 'advanced_search', + 'tbm=shop', + 'ageverification.google.co.kr' +] + +unsupported_g_divs = [ + 'google.com/preferences?hl=', + 'ageverification.google.co.kr' + 'google.com/aclk?sa=' +] + + +def extract_q(q_str: str, href: str) -> str: + """Extracts the 'q' element from a result link. This is typically + either the link to a result's website, or a string. + + Args: + q_str: The result link to parse + href: The full url to check for standalone 'q' elements first, + rather than parsing the whole query string and then checking. + + Returns: + str: The 'q' element of the link, or an empty string + """ + return parse_qs(q_str, keep_blank_values=True)['q'][0] if ('&q=' in href or '?q=' in href) else '' + + +def build_map_url(href: str) -> str: + """Tries to extract known args that explain the location in the url. If a + location is found, returns the default url with it. Otherwise, returns the + url unchanged. + + Args: + href: The full url to check. + + Returns: + str: The parsed url, or the url unchanged. + """ + # parse the url + parsed_url = parse_qs(href) + # iterate through the known parameters and try build the url + for param in MAPS_ARGS: + if param in parsed_url: + return MAPS_URL + "?q=" + parsed_url[param][0] + + # query could not be extracted returning unchanged url + return href + + +def clean_query(query: str) -> str: + """Strips the blocked site list from the query, if one is being + used. + + Args: + query: The query string + + Returns: + str: The query string without any "-site:..." filters + """ + return query[:query.find('-site:')] if '-site:' in query else query + + +def clean_css(css: str, page_url: str) -> str: + """Removes all remote URLs from a CSS string. + + Args: + css: The CSS string + + Returns: + str: The filtered CSS, with URLs proxied through Whoogle + """ + sheet = cssutils.parseString(css) + urls = cssutils.getUrls(sheet) + + for url in urls: + abs_url = get_abs_url(url, page_url) + if abs_url.startswith('data:'): + continue + css = css.replace( + url, + f'{Endpoint.element}?type=image/png&url={abs_url}' + ) + + return css + + +class Filter: + # Limit used for determining if a result is a "regular" result or a list + # type result (such as "people also asked", "related searches", etc) + RESULT_CHILD_LIMIT = 7 + + def __init__( + self, + user_key: str, + config: Config, + root_url='', + page_url='', + query='', + mobile=False) -> None: + self.soup = None + self.config = config + self.mobile = mobile + self.user_key = user_key + self.page_url = page_url + self.query = query + self.main_divs = ResultSet('') + self._elements = 0 + self._av = set() + + self.root_url = root_url[:-1] if root_url.endswith('/') else root_url + + def __getitem__(self, name): + return getattr(self, name) + + @property + def elements(self): + return self._elements + + def encrypt_path(self, path, is_element=False) -> str: + # Encrypts path to avoid plaintext results in logs + if is_element: + # Element paths are encrypted separately from text, to allow key + # regeneration once all items have been served to the user + enc_path = Fernet(self.user_key).encrypt(path.encode()).decode() + self._elements += 1 + return enc_path + + return Fernet(self.user_key).encrypt(path.encode()).decode() + + def clean(self, soup) -> BeautifulSoup: + self.soup = soup + self.main_divs = self.soup.find('div', {'id': 'main'}) + self.remove_ads() + self.remove_block_titles() + self.remove_block_url() + self.collapse_sections() + self.update_css() + self.update_styling() + self.remove_block_tabs() + + # self.main_divs is only populated for the main page of search results + # (i.e. not images/news/etc). + if self.main_divs: + for div in self.main_divs: + self.sanitize_div(div) + + for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]: + self.update_element_src(img, 'image/png') + + for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]: + self.update_element_src(audio, 'audio/mpeg') + audio['controls'] = '' + + for link in self.soup.find_all('a', href=True): + self.update_link(link) + self.add_favicon(link) + + if self.config.alts: + self.site_alt_swap() + + input_form = self.soup.find('form') + if input_form is not None: + input_form['method'] = 'GET' if self.config.get_only else 'POST' + # Use a relative URI for submissions + input_form['action'] = 'search' + + # Ensure no extra scripts passed through + for script in self.soup('script'): + script.decompose() + + # Update default footer and header + footer = self.soup.find('footer') + if footer: + # Remove divs that have multiple links beyond just page navigation + [_.decompose() for _ in footer.find_all('div', recursive=False) + if len(_.find_all('a', href=True)) > 3] + for link in footer.find_all('a', href=True): + link['href'] = f'{link["href"]}&preferences={self.config.preferences}' + + header = self.soup.find('header') + if header: + header.decompose() + self.remove_site_blocks(self.soup) + return self.soup + + def sanitize_div(self, div) -> None: + """Removes escaped script and iframe tags from results + + Returns: + None (The soup object is modified directly) + """ + if not div: + return + + for d in div.find_all('div', recursive=True): + d_text = d.find(text=True, recursive=False) + + # Ensure we're working with tags that contain text content + if not d_text or not d.string: + continue + + d.string = html.unescape(d_text) + div_soup = BeautifulSoup(d.string, 'html.parser') + + # Remove all valid script or iframe tags in the div + for script in div_soup.find_all('script'): + script.decompose() + + for iframe in div_soup.find_all('iframe'): + iframe.decompose() + + d.string = str(div_soup) + + def add_favicon(self, link) -> None: + """Adds icons for each returned result, using the result site's favicon + + Returns: + None (The soup object is modified directly) + """ + # Skip empty, parentless, or internal links + show_favicons = read_config_bool('WHOOGLE_SHOW_FAVICONS', True) + is_valid_link = link and link.parent and link['href'].startswith('http') + if not show_favicons or not is_valid_link: + return + + parent = link.parent + is_result_div = False + + # Check each parent to make sure that the div doesn't already have a + # favicon attached, and that the div is a result div + while parent: + p_cls = parent.attrs.get('class') or [] + if 'has-favicon' in p_cls or GClasses.scroller_class in p_cls: + return + elif GClasses.result_class_a not in p_cls: + parent = parent.parent + else: + is_result_div = True + break + + if not is_result_div: + return + + # Construct the html for inserting the icon into the parent div + parsed = urlparse.urlparse(link['href']) + favicon = self.encrypt_path( + f'{parsed.scheme}://{parsed.netloc}/favicon.ico', + is_element=True) + src = f'{self.root_url}/{Endpoint.element}?url={favicon}' + \ + '&type=image/x-icon' + html = f'' + + favicon = BeautifulSoup(html, 'html.parser') + link.parent.insert(0, favicon) + + # Update all parents to indicate that a favicon has been attached + parent = link.parent + while parent: + p_cls = parent.get('class') or [] + p_cls.append('has-favicon') + parent['class'] = p_cls + parent = parent.parent + + if GClasses.result_class_a in p_cls: + break + + def remove_site_blocks(self, soup) -> None: + if not self.config.block or not soup.body: + return + search_string = ' '.join(['-site:' + + _ for _ in self.config.block.split(',')]) + selected = soup.body.findAll(text=re.compile(search_string)) + + for result in selected: + result.string.replace_with(result.string.replace( + search_string, '')) + + def remove_ads(self) -> None: + """Removes ads found in the list of search result divs + + Returns: + None (The soup object is modified directly) + """ + if not self.main_divs: + return + + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + div_ads = [_ for _ in div.find_all('span', recursive=True) + if has_ad_content(_.text)] + _ = div.decompose() if len(div_ads) else None + + def remove_block_titles(self) -> None: + if not self.main_divs or not self.config.block_title: + return + block_title = re.compile(self.config.block_title) + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + block_divs = [_ for _ in div.find_all('h3', recursive=True) + if block_title.search(_.text) is not None] + _ = div.decompose() if len(block_divs) else None + + def remove_block_url(self) -> None: + if not self.main_divs or not self.config.block_url: + return + block_url = re.compile(self.config.block_url) + for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: + block_divs = [_ for _ in div.find_all('a', recursive=True) + if block_url.search(_.attrs['href']) is not None] + _ = div.decompose() if len(block_divs) else None + + def remove_block_tabs(self) -> None: + if self.main_divs: + for div in self.main_divs.find_all( + 'div', + attrs={'class': f'{GClasses.main_tbm_tab}'} + ): + _ = div.decompose() + else: + # when in images tab + for div in self.soup.find_all( + 'div', + attrs={'class': f'{GClasses.images_tbm_tab}'} + ): + _ = div.decompose() + + def collapse_sections(self) -> None: + """Collapses long result sections ("people also asked", "related + searches", etc) into "details" elements + + These sections are typically the only sections in the results page that + have more than ~5 child divs within a primary result div. + + Returns: + None (The soup object is modified directly) + """ + minimal_mode = read_config_bool('WHOOGLE_MINIMAL') + + def pull_child_divs(result_div: BeautifulSoup): + try: + return result_div.findChildren( + 'div', recursive=False + )[0].findChildren( + 'div', recursive=False) + except IndexError: + return [] + + if not self.main_divs: + return + #töörölni kell People also ask, + search_terms = ["People also search for", "Related searches", "Kapcsolódó keresések", "Mások ezeket keresték még"] + details_list = [] + + # Loop through results and check for the number of child divs in each + for result in self.main_divs.find_all(): + result_children = pull_child_divs(result) + if minimal_mode: + if any(f">{x} 1: + subtitle = ' (' + \ + ''.join(content[1:]) + ')' + elem.decompose() + break + + # Determine the class based on the label content + if any(term in label for term in search_terms): + details_class = 'search-recommendations' + details_attrs = {'class': details_class, 'open': 'true'} + else: + details_class = 'other-results' + details_attrs = {'class': details_class} + + + # Create the new details element to wrap around the result's + # first parent + parent = None + idx = 0 + while not parent and idx < len(result_children): + parent = result_children[idx].parent + idx += 1 + + details = BeautifulSoup(features='html.parser').new_tag('details', attrs=details_attrs) + summary = BeautifulSoup(features='html.parser').new_tag('summary', attrs={'class': "summary_div"}) + summary.string = label + + if subtitle: + soup = BeautifulSoup(subtitle, 'html.parser') + summary.append(soup) + + details.append(summary) + + if parent and not minimal_mode: + parent.wrap(details) + elif parent and minimal_mode: + # Remove parent element from document if "minimal mode" is + # enabled + parent.decompose() + + for details in details_list: + self.main_divs.append(details) + + def update_element_src(self, element: Tag, mime: str, attr='src') -> None: + """Encrypts the original src of an element and rewrites the element src + to use the "/element?src=" pass-through. + + Returns: + None (The soup element is modified directly) + + """ + src = element[attr].split(' ')[0] + + if src.startswith('//'): + src = 'https:' + src + elif src.startswith('data:'): + return + + if src.startswith(LOGO_URL): + # Re-brand with Whoogle logo + element.replace_with(BeautifulSoup( + render_template('logo.html'), + features='html.parser')) + return + elif src.startswith(G_M_LOGO_URL): + # Re-brand with single-letter Whoogle logo + element['src'] = 'static/img/favicon/apple-icon.png' + element.parent['href'] = 'home' + return + elif src.startswith(GOOG_IMG) or GOOG_STATIC in src: + element['src'] = BLANK_B64 + return + + element[attr] = f'{self.root_url}/{Endpoint.element}?url=' + ( + self.encrypt_path( + src, + is_element=True + ) + '&type=' + urlparse.quote(mime) + ) + + def update_css(self) -> None: + """Updates URLs used in inline styles to be proxied by Whoogle + using the /element endpoint. + + Returns: + None (The soup element is modified directly) + + """ + # Filter all