You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

167 lines
3.9 KiB
Go

package fuckgoogle
import (
"fmt"
"net/http"
"regexp"
"sync"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
)
type GoogleMatcher struct {
// traffic from google itself
Bot bool `json:"googlebot,omitempty"`
IPranges bool `json:"ip_ranges,omitempty"`
// traffic from google users
UAChrome bool `json:"useragent_chrome,omitempty"`
UAChromeish bool `json:"useragent_chromeish,omitempty"`
Referer bool `json:"referer_google,omitempty"`
RefererExt bool `json:"referer_other,omitempty"`
ipMatcher caddyhttp.MatchRemoteIP
}
var provisionIPRangesOnce sync.Once
func init() {
caddy.RegisterModule(GoogleMatcher{})
}
// CaddyModule returns the Caddy module information.
func (GoogleMatcher) CaddyModule() caddy.ModuleInfo {
return caddy.ModuleInfo{
ID: "http.matchers.google",
New: func() caddy.Module { return new(GoogleMatcher) },
}
}
// Provision sets up the module defaults.
func (m *GoogleMatcher) Provision(ctx caddy.Context) (err error) {
if m.IPranges {
provisionIPRangesOnce.Do(func() {
var ranges []string
ranges, err = FetchGoogleIPRangesFull(ctx)
if err != nil {
return
}
ctx.Logger(m).Warn(fmt.Sprintf("loaded google IP ranges: %v", ranges))
m.ipMatcher = caddyhttp.MatchRemoteIP{Ranges: ranges}
err = m.ipMatcher.Provision(ctx)
})
}
return err
}
// Match returns true if r matches m.
func (m GoogleMatcher) Match(r *http.Request) bool {
ua := r.Header.Get("user-agent")
if m.UAChromeish && isChromeishUA(ua) {
return true
}
if m.UAChrome && isChromeUA(ua) {
return true
}
ref := r.Header.Get("referer")
if m.Referer && isGoogleRef(ref) {
return true
}
if m.RefererExt {
// TODO not implemented
// return false
}
if m.Bot && isGooglebot(ua) {
return true
}
if m.IPranges && m.ipMatcher.Match(r) {
return true
}
return false
}
// UnmarshalCaddyfile sets up the handler from Caddyfile tokens.
//
// google [googlebot] [ip-ranges] [ua-chrome] [ua-chromeish] [referer-google] [referer-other]
//
// google {
// [googlebot]
// [ip-ranges]
// [ua-chrome]
// [ua-chromeish]
// [referer-google]
// [referer-other]
// }
func (m *GoogleMatcher) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
handleArg := func(arg string) bool {
switch d.Val() {
case "googlebot":
m.Bot = true
case "ip-ranges":
m.IPranges = true
case "ua-chrome":
m.UAChrome = true
case "ua-chromeish":
m.UAChromeish = true
case "referer-google":
m.Referer = true
case "referer-other":
m.RefererExt = true
default:
return false
}
return true
}
var setAnything bool
for d.Next() {
for d.NextArg() {
if !handleArg(d.Val()) {
return d.Errf("unrecognized argument: %s", d.Val())
}
setAnything = true
}
for d.NextBlock(0) {
if !handleArg(d.Val()) {
return d.Errf("unrecognized subdirective: %s", d.Val())
}
setAnything = true
}
if !setAnything {
return d.Err("this matcher has no effect without any arguments!")
}
}
return nil
}
// Interface guards
var (
_ caddy.Module = (*GoogleMatcher)(nil)
_ caddy.Provisioner = (*GoogleMatcher)(nil)
_ caddyhttp.RequestMatcher = (*GoogleMatcher)(nil)
_ caddyfile.Unmarshaler = (*GoogleMatcher)(nil)
)
var (
uaChromederived = regexp.MustCompile("(Brave|Chromium|Edg|OPR|Yowser|Vivaldi)")
uaChrome = regexp.MustCompile("Chrome")
uaGooglebot = regexp.MustCompile("GoogleBot")
refGoogle = regexp.MustCompile(`^(https://)?(www\.)?google\.[[:alpha:]]{2,3}/`)
)
func isChromeishUA(ua string) bool {
return uaChromederived.MatchString(ua)
}
func isChromeUA(ua string) bool {
return uaChrome.MatchString(ua) && !uaChromederived.MatchString(ua)
}
func isGooglebot(ua string) bool {
return uaGooglebot.MatchString(ua)
}
func isGoogleRef(referer string) bool {
return refGoogle.MatchString(referer)
}