mobilizon-event-importer/scrapers/facebook.com/facebook.go

68 lines
1.8 KiB
Go

package facebook
import (
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"git.nroo.de/norwin/mobilizon-event-importer/mobilizon"
"git.nroo.de/norwin/mobilizon-event-importer/scrapers"
"git.nroo.de/norwin/mobilizon-event-importer/scrapers/jsonld"
)
func init() {
scrapers.Register(NewJsonLdScraper(), "facebook.com", "www.facebook.com", "m.facebook.com")
}
var (
cdataFilter = regexp.MustCompile("^\\/\\/<!\\[CDATA\\[\n(.*)\n\\/\\/\\]\\]>$")
)
type JsonLdScraper struct{ client *http.Client }
// NewJsonLdScraper creates a new scraper that extracts event data via
// ld+json script tags from a facebook event page.
// NOTE that this is only included on the mobile pages for *upcoming* events.
// This special casing suggests that this is phased out and won't work long term.
func NewJsonLdScraper() *JsonLdScraper {
return &JsonLdScraper{client: http.DefaultClient}
}
func NormalizeEventID(input string) string {
u, err := url.Parse(input)
if err != nil {
return input
}
return strings.SplitN(strings.TrimPrefix(u.Path, "/events/"), "/", 2)[0]
}
func (s JsonLdScraper) ScrapeEvent(id string) (*mobilizon.Event, error) {
// also accept urls
id = NormalizeEventID(id)
u, _ := url.Parse("https://m.facebook.com/events/")
u.Path += id
res, err := s.client.Get(u.String())
if err != nil {
return nil, err
}
defer res.Body.Close()
event, err := jsonld.FindLdJsonInHtml(res.Body, func(x []byte) []byte {
return cdataFilter.ReplaceAll(x, []byte("$1"))
})
if err == jsonld.ErrJsonLdNotFound {
return nil, fmt.Errorf("%v: past events are not supported :(", err)
}
if err != nil {
return nil, err
}
return event.ToMobilizonEvent(), nil
}
func (s JsonLdScraper) ScrapeFeed(id string) ([]*mobilizon.Event, error) {
return []*mobilizon.Event{}, fmt.Errorf("TODO")
}