68 lines
1.8 KiB
Go
68 lines
1.8 KiB
Go
package facebook
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"git.nroo.de/norwin/mobilizon-event-importer/mobilizon"
|
|
"git.nroo.de/norwin/mobilizon-event-importer/scrapers"
|
|
"git.nroo.de/norwin/mobilizon-event-importer/scrapers/jsonld"
|
|
)
|
|
|
|
func init() {
|
|
scrapers.Register(NewJsonLdScraper(), "facebook.com", "www.facebook.com", "m.facebook.com")
|
|
}
|
|
|
|
var (
|
|
cdataFilter = regexp.MustCompile("^\\/\\/<!\\[CDATA\\[\n(.*)\n\\/\\/\\]\\]>$")
|
|
)
|
|
|
|
type JsonLdScraper struct{ client *http.Client }
|
|
|
|
// NewJsonLdScraper creates a new scraper that extracts event data via
|
|
// ld+json script tags from a facebook event page.
|
|
// NOTE that this is only included on the mobile pages for *upcoming* events.
|
|
// This special casing suggests that this is phased out and won't work long term.
|
|
func NewJsonLdScraper() *JsonLdScraper {
|
|
return &JsonLdScraper{client: http.DefaultClient}
|
|
}
|
|
|
|
func NormalizeEventID(input string) string {
|
|
u, err := url.Parse(input)
|
|
if err != nil {
|
|
return input
|
|
}
|
|
return strings.SplitN(strings.TrimPrefix(u.Path, "/events/"), "/", 2)[0]
|
|
}
|
|
|
|
func (s JsonLdScraper) ScrapeEvent(id string) (*mobilizon.Event, error) {
|
|
// also accept urls
|
|
id = NormalizeEventID(id)
|
|
|
|
u, _ := url.Parse("https://m.facebook.com/events/")
|
|
u.Path += id
|
|
res, err := s.client.Get(u.String())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
event, err := jsonld.FindLdJsonInHtml(res.Body, func(x []byte) []byte {
|
|
return cdataFilter.ReplaceAll(x, []byte("$1"))
|
|
})
|
|
if err == jsonld.ErrJsonLdNotFound {
|
|
return nil, fmt.Errorf("%v: past events are not supported :(", err)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return event.ToMobilizonEvent(), nil
|
|
}
|
|
|
|
func (s JsonLdScraper) ScrapeFeed(id string) ([]*mobilizon.Event, error) {
|
|
return []*mobilizon.Event{}, fmt.Errorf("TODO")
|
|
}
|