Merge remote-tracking branch 'origin/main' into feature/public-instance-sessions

This commit is contained in:
Ben Busby 2021-11-15 20:43:52 -07:00
commit b0733fd74a
No known key found for this signature in database
GPG Key ID: 339B7B7EB5333D14
17 changed files with 507 additions and 12 deletions

View File

@ -1,3 +1,3 @@
language = "bash"
run = "pip install -r requirements.txt && ./run"
onBoot = "pip install -r requirements.txt && ./run"
run = "kill $(lsof -t -i:5000) > /dev/null 2>&1; pip install -r requirements.txt && ./run"
onBoot = "kill $(lsof -t -i:5000) > /dev/null 2>&1; pip install -r requirements.txt && ./run"

View File

@ -22,6 +22,7 @@ Contents
6. [Manual](#f-manual)
7. [Docker](#g-manual-docker)
8. [Arch/AUR](#arch-linux--arch-based-distributions)
9. [Helm/Kubernetes](#helm-chart-for-kubernetes)
4. [Environment Variables and Configuration](#environment-variables)
5. [Usage](#usage)
6. [Extra Steps](#extra-steps)
@ -287,6 +288,13 @@ You may also edit environment variables from your apps Settings tab in the He
#### Arch Linux & Arch-based Distributions
There is an [AUR package available](https://aur.archlinux.org/packages/whoogle-git/), as well as a pre-built and daily updated package available at [Chaotic-AUR](https://chaotic.cx).
#### Helm chart for Kubernetes
To use the Kubernetes Helm Chart:
1. Ensure you have [Helm](https://helm.sh/docs/intro/install/) `>=3.0.0` installed
2. Clone this repository
3. Update [charts/whoogle/values.yaml](./charts/whoogle/values.yaml) as desired
4. Run `helm install whoogle ./charts/whoogle`
#### Using your own server, or alternative container deployment
There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment.
@ -492,6 +500,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| Website | Country | Language | Cloudflare |
|-|-|-|-|
| [https://search.albony.xyz](https://search.albony.xyz/) | 🇮🇳 IN | Multi-choice | |
| [https://whoogle.sdf.org](https://whoogle.sdf.org) | 🇺🇸 US | Multi-choice |
| [https://whoogle.kavin.rocks](https://whoogle.kavin.rocks) | 🇮🇳 IN | Unknown | ✅ |
| [https://search.garudalinux.org](https://search.garudalinux.org) | 🇩🇪 DE | Multi-choice | |
@ -500,6 +509,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| [https://search.exonip.de](https://search.exonip.de) | 🇳🇱 NL | Multi-choice | |
| [https://s.alefvanoon.xyz](https://s.alefvanoon.xyz) | 🇺🇸 US | English | ✅ |
| [https://search.flux.industries](https://search.flux.industries) | 🇩🇪 DE | German | ✅ |
| [https://www.whooglesearch.ml](https://www.whooglesearch.ml) | 🇺🇸 US | English | |
| [http://whoogledq5f5wly5p4i2ohnvjwlihnlg4oajjum2oeddfwqdwupbuhqd.onion](http://whoogledq5f5wly5p4i2ohnvjwlihnlg4oajjum2oeddfwqdwupbuhqd.onion) | 🇮🇳 IN | Unknown | |
* A checkmark in the "Cloudflare" category here refers to the use of the reverse proxy, [Cloudflare](https://cloudflare). The checkmark will not be listed for a site which uses Cloudflare DNS but rather the proxying service which grants Cloudflare the ability to monitor traffic to the website.

View File

@ -381,6 +381,10 @@ class Filter:
for item in results_all:
urls = item.find('a')['href'].split('&imgrefurl=')
# Skip urls that are not two-element lists
if len(urls) != 2:
continue
img_url = urlparse.unquote(urls[0].replace('/imgres?imgurl=', ''))
try:

View File

@ -274,14 +274,19 @@ class Request:
# Make sure that the tor connection is valid, if enabled
if self.tor:
try:
tor_check = requests.get('https://check.torproject.org/',
proxies=self.proxies, headers=headers)
self.tor_valid = 'Congratulations' in tor_check.text
if not self.tor_valid:
raise TorError(
"Tor connection succeeded, but the connection could not "
"be validated by torproject.org",
"Tor connection succeeded, but the connection could "
"not be validated by torproject.org",
disable=True)
except ConnectionError:
raise TorError(
"Error raised during Tor connection validation",
disable=True)
response = requests.get(

View File

@ -516,5 +516,42 @@
"light": "रोशनी",
"dark": "अंधेरा",
"system": "प्रणाली"
},
"lang_ja": {
"search": "検索",
"config": "設定",
"config-country": "検索結果を国でフィルタ",
"config-country-help": "注: 有効にした場合、選択した国で*ホストされている*ウェブサイトのみが検索結果に表示されます。",
"config-lang": "インタフェースの言語",
"config-lang-search": "検索する言語",
"config-near": "場所",
"config-near-help": "街の名前",
"config-block": "ブロック",
"config-block-help": "サイトのリストをコンマ区切りで入力",
"config-block-title": "タイトルでブロック",
"config-block-title-help": "正規表現を使用します",
"config-block-url": "でブロック",
"config-block-url-help": "正規表現を使用",
"config-theme": "テーマ",
"config-nojs": "非JSリンクを表示",
"config-dark": "ダークモード",
"config-safe": "セーフサーチ",
"config-alts": "ソーシャルメディアのリンクを置き換え",
"config-alts-help": "Twitter/YouTube/Instagramなどのリンクを、プライバシーを尊重した代替サイトに置き換えます。",
"config-new-tab": "新しいタブでリンクを開く",
"config-images": "フルサイズの画像を検索",
"config-images-help": "(実験的) デスクトップの画像検索に「画像を表示」オプションを追加します。これにより、画像検索結果のサムネイルの解像度が低くなります。",
"config-tor": "Torを使用",
"config-get-only": "GETリクエストのみ",
"config-url": "ルートURL",
"config-css": "カスタムCSS",
"load": "読み込み",
"apply": "反映",
"save-as": "名前を付けて保存",
"github-link": "Githubで確認",
"translate": "翻訳",
"light": "ライト",
"dark": "ダーク",
"system": "自動"
}
}

View File

@ -139,10 +139,8 @@ class Search:
html_soup = content_filter.view_image(html_soup)
# Indicate whether or not a Tor connection is active
tor_banner = bsoup('', 'html.parser')
if g.user_request.tor_valid:
tor_banner = bsoup(TOR_BANNER, 'html.parser')
html_soup.insert(0, tor_banner)
html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
if self.feeling_lucky:
return get_first_link(html_soup)

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

23
charts/whoogle/Chart.yaml Normal file
View File

@ -0,0 +1,23 @@
apiVersion: v2
name: whoogle
description: A self hosted search engine on Kubernetes
type: application
version: 0.1.0
appVersion: 0.6.0
icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png
sources:
- https://github.com/benbusby/whoogle-search
- https://gitlab.com/benbusby/whoogle-search
- https://gogs.benbusby.com/benbusby/whoogle-search
keywords:
- whoogle
- degoogle
- search
- google
- search-engine
- privacy
- tor
- python

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "whoogle.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "whoogle.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "whoogle.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "whoogle.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "whoogle.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "whoogle.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "whoogle.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "whoogle.labels" -}}
helm.sh/chart: {{ include "whoogle.chart" . }}
{{ include "whoogle.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "whoogle.selectorLabels" -}}
app.kubernetes.io/name: {{ include "whoogle.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "whoogle.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "whoogle.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,72 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "whoogle.fullname" . }}
labels:
{{- include "whoogle.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "whoogle.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "whoogle.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.image.pullSecrets }}
imagePullSecrets:
{{- range .}}
- name: {{ . }}
{{- end }}
{{- end }}
serviceAccountName: {{ include "whoogle.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: whoogle
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- with .Values.conf }}
env:
{{- range $k,$v := . }}
{{- if $v }}
- name: {{ $k }}
value: {{ tpl (toString $v) $ | quote }}
{{- end }}
{{- end }}
{{- end }}
ports:
- name: http
containerPort: {{ default 5000 .Values.conf.EXPOSE_PORT }}
protocol: TCP
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,28 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "whoogle.fullname" . }}
labels:
{{- include "whoogle.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "whoogle.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "whoogle.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "whoogle.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "whoogle.fullname" . }}
labels:
{{- include "whoogle.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "whoogle.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,12 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "whoogle.serviceAccountName" . }}
labels:
{{- include "whoogle.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "whoogle.fullname" . }}-test-connection"
labels:
{{- include "whoogle.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "whoogle.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

108
charts/whoogle/values.yaml Normal file
View File

@ -0,0 +1,108 @@
# Default values for whoogle.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
nameOverride: ""
fullnameOverride: ""
replicaCount: 1
image:
repository: benbusby/whoogle-search
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
pullSecrets: []
# - my-image-pull-secret
serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
conf: {}
# WHOOGLE_DOTENV: "" # Load environment variables in whoogle.env
# WHOOGLE_USER: "" # The username for basic auth. WHOOGLE_PASS must also be set if used.
# WHOOGLE_PASS: "" # The password for basic auth. WHOOGLE_USER must also be set if used.
# WHOOGLE_PROXY_USER: "" # The username of the proxy server.
# WHOOGLE_PROXY_PASS: "" # The password of the proxy server.
# WHOOGLE_PROXY_TYPE: "" # The type of the proxy server. Can be "socks5", "socks4", or "http".
# WHOOGLE_PROXY_LOC: "" # The location of the proxy server (host or ip).
# EXPOSE_PORT: "" # The port where Whoogle will be exposed. (default 5000)
# HTTPS_ONLY: "" # Enforce HTTPS. (See https://github.com/benbusby/whoogle-search#https-enforcement)
# WHOOGLE_ALT_TW: "" # The twitter.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_YT: "" # The youtube.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_IG: "" # The instagram.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_RD: "" # The reddit.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_TL: "" # The Google Translate alternative to use. This is used for all "translate ____" searches.
# WHOOGLE_ALT_MD: "" # The medium.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_AUTOCOMPLETE: "" # Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable
# WHOOGLE_MINIMAL: "" # Remove everything except basic result cards from all search queries.
# WHOOGLE_CONFIG_DISABLE: "" # Hide config from UI and disallow changes to config by client
# WHOOGLE_CONFIG_COUNTRY: "" # Filter results by hosting country
# WHOOGLE_CONFIG_LANGUAGE: "" # Set interface language
# WHOOGLE_CONFIG_SEARCH_LANGUAGE: "" # Set search result language
# WHOOGLE_CONFIG_BLOCK: "" # Block websites from search results (use comma-separated list)
# WHOOGLE_CONFIG_THEME: "" # Set theme mode (light, dark, or system)
# WHOOGLE_CONFIG_SAFE: "" # Enable safe searches
# WHOOGLE_CONFIG_ALTS: "" # Use social media site alternatives (nitter, invidious, etc)
# WHOOGLE_CONFIG_NEAR: "" # Restrict results to only those near a particular city
# WHOOGLE_CONFIG_TOR: "" # Use Tor routing (if available)
# WHOOGLE_CONFIG_NEW_TAB: "" # Always open results in new tab
# WHOOGLE_CONFIG_VIEW_IMAGE: "" # Enable View Image option
# WHOOGLE_CONFIG_GET_ONLY: "" # Search using GET requests only
# WHOOGLE_CONFIG_URL: "" # The root url of the instance (https://<your url>/)
# WHOOGLE_CONFIG_STYLE: "" # The custom CSS to use for styling (should be single line)
podAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext:
runAsUser: 0
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
service:
type: ClusterIP
port: 5000
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: whoogle.example.com
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - whoogle.example.com
resources: {}
# requests:
# cpu: 100m
# memory: 128Mi
# limits:
# cpu: 100m
# memory: 128Mi
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
nodeSelector: {}
tolerations: []
affinity: {}