You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
167 lines
3.9 KiB
Go
167 lines
3.9 KiB
Go
package fuckgoogle
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"regexp"
|
|
"sync"
|
|
|
|
"github.com/caddyserver/caddy/v2"
|
|
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
|
|
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
|
|
)
|
|
|
|
type GoogleMatcher struct {
|
|
// traffic from google itself
|
|
Bot bool `json:"googlebot,omitempty"`
|
|
IPranges bool `json:"ip_ranges,omitempty"`
|
|
// traffic from google users
|
|
UAChrome bool `json:"useragent_chrome,omitempty"`
|
|
UAChromeish bool `json:"useragent_chromeish,omitempty"`
|
|
Referer bool `json:"referer_google,omitempty"`
|
|
RefererExt bool `json:"referer_other,omitempty"`
|
|
|
|
ipMatcher caddyhttp.MatchRemoteIP
|
|
}
|
|
|
|
var provisionIPRangesOnce sync.Once
|
|
|
|
func init() {
|
|
caddy.RegisterModule(GoogleMatcher{})
|
|
}
|
|
|
|
// CaddyModule returns the Caddy module information.
|
|
func (GoogleMatcher) CaddyModule() caddy.ModuleInfo {
|
|
return caddy.ModuleInfo{
|
|
ID: "http.matchers.google",
|
|
New: func() caddy.Module { return new(GoogleMatcher) },
|
|
}
|
|
}
|
|
|
|
// Provision sets up the module defaults.
|
|
func (m *GoogleMatcher) Provision(ctx caddy.Context) (err error) {
|
|
if m.IPranges {
|
|
provisionIPRangesOnce.Do(func() {
|
|
var ranges []string
|
|
ranges, err = FetchGoogleIPRangesFull(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
ctx.Logger(m).Warn(fmt.Sprintf("loaded google IP ranges: %v", ranges))
|
|
m.ipMatcher = caddyhttp.MatchRemoteIP{Ranges: ranges}
|
|
err = m.ipMatcher.Provision(ctx)
|
|
})
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Match returns true if r matches m.
|
|
func (m GoogleMatcher) Match(r *http.Request) bool {
|
|
ua := r.Header.Get("user-agent")
|
|
if m.UAChromeish && isChromeishUA(ua) {
|
|
return true
|
|
}
|
|
if m.UAChrome && isChromeUA(ua) {
|
|
return true
|
|
}
|
|
ref := r.Header.Get("referer")
|
|
if m.Referer && isGoogleRef(ref) {
|
|
return true
|
|
}
|
|
if m.RefererExt {
|
|
// TODO not implemented
|
|
// return false
|
|
}
|
|
if m.Bot && isGooglebot(ua) {
|
|
return true
|
|
}
|
|
if m.IPranges && m.ipMatcher.Match(r) {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// UnmarshalCaddyfile sets up the handler from Caddyfile tokens.
|
|
//
|
|
// google [googlebot] [ip-ranges] [ua-chrome] [ua-chromeish] [referer-google] [referer-other]
|
|
//
|
|
// google {
|
|
// [googlebot]
|
|
// [ip-ranges]
|
|
// [ua-chrome]
|
|
// [ua-chromeish]
|
|
// [referer-google]
|
|
// [referer-other]
|
|
// }
|
|
func (m *GoogleMatcher) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
|
|
handleArg := func(arg string) bool {
|
|
switch d.Val() {
|
|
case "googlebot":
|
|
m.Bot = true
|
|
case "ip-ranges":
|
|
m.IPranges = true
|
|
case "ua-chrome":
|
|
m.UAChrome = true
|
|
case "ua-chromeish":
|
|
m.UAChromeish = true
|
|
case "referer-google":
|
|
m.Referer = true
|
|
case "referer-other":
|
|
m.RefererExt = true
|
|
default:
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
var setAnything bool
|
|
for d.Next() {
|
|
for d.NextArg() {
|
|
if !handleArg(d.Val()) {
|
|
return d.Errf("unrecognized argument: %s", d.Val())
|
|
}
|
|
setAnything = true
|
|
}
|
|
for d.NextBlock(0) {
|
|
if !handleArg(d.Val()) {
|
|
return d.Errf("unrecognized subdirective: %s", d.Val())
|
|
}
|
|
setAnything = true
|
|
}
|
|
if !setAnything {
|
|
return d.Err("this matcher has no effect without any arguments!")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Interface guards
|
|
var (
|
|
_ caddy.Module = (*GoogleMatcher)(nil)
|
|
_ caddy.Provisioner = (*GoogleMatcher)(nil)
|
|
_ caddyhttp.RequestMatcher = (*GoogleMatcher)(nil)
|
|
_ caddyfile.Unmarshaler = (*GoogleMatcher)(nil)
|
|
)
|
|
|
|
var (
|
|
uaChromederived = regexp.MustCompile("(Brave|Chromium|Edg|OPR|Yowser|Vivaldi)")
|
|
uaChrome = regexp.MustCompile("Chrome")
|
|
uaGooglebot = regexp.MustCompile("GoogleBot")
|
|
refGoogle = regexp.MustCompile(`^(https://)?(www\.)?google\.[[:alpha:]]{2,3}/`)
|
|
)
|
|
|
|
func isChromeishUA(ua string) bool {
|
|
return uaChromederived.MatchString(ua)
|
|
}
|
|
|
|
func isChromeUA(ua string) bool {
|
|
return uaChrome.MatchString(ua) && !uaChromederived.MatchString(ua)
|
|
}
|
|
|
|
func isGooglebot(ua string) bool {
|
|
return uaGooglebot.MatchString(ua)
|
|
}
|
|
|
|
func isGoogleRef(referer string) bool {
|
|
return refGoogle.MatchString(referer)
|
|
}
|