131 lines
3.8 KiB
Go
131 lines
3.8 KiB
Go
package jsonld
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"regexp"
|
|
"time"
|
|
|
|
"git.nroo.de/norwin/mobilizon-event-importer/mobilizon"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
var ErrJsonLdNotFound = fmt.Errorf("json+ld event description not found")
|
|
|
|
type EventAttendanceMode string
|
|
|
|
const (
|
|
OfflineEventAttendanceMode EventAttendanceMode = "https://schema.org/OfflineEventAttendanceMode"
|
|
OnlineEventAttendanceMode EventAttendanceMode = "https://schema.org/OnlineEventAttendanceMode"
|
|
MixedEventAttendanceMode EventAttendanceMode = "https://schema.org/MixedEventAttendanceMode"
|
|
)
|
|
|
|
// FIXME: most json+ld values can be either scalars or arrays, depending on count.
|
|
// how the fuck do we map this to go types?!
|
|
|
|
type Event struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
StartDate LdTime `json:"startDate"`
|
|
EndDate LdTime `json:"endDate"`
|
|
EventAttendanceMode EventAttendanceMode `json:"eventAttendanceMode"`
|
|
URL string `json:"url"`
|
|
ImageURL string `json:"image"`
|
|
Location Place `json:"location"`
|
|
// Performers []Perfomer `json:"performers"`
|
|
// Offers []Offer
|
|
}
|
|
|
|
type Place struct {
|
|
Name string `json:"name"`
|
|
Address Address `json:"address"`
|
|
URL string `json:"url"`
|
|
}
|
|
|
|
type Address struct {
|
|
Street string `json:"streetAddress"`
|
|
PostCode string `json:"postalCode"`
|
|
City string `json:"addressLocality"`
|
|
Country string `json:"addressCountry"`
|
|
}
|
|
|
|
// LdTime is a helper for unmarshalling json. This is needed, as the time format
|
|
// returned by facebook is not compatible with go's RFC3339:
|
|
// - 2022-04-04T22:00:00+0200
|
|
// + 2022-04-04T22:00:00+02:00
|
|
type LdTime struct{ time.Time }
|
|
|
|
func (t *LdTime) UnmarshalJSON(b []byte) (err error) {
|
|
b = b[1 : len(b)-1] // strip quotes
|
|
s := string(timeszoneColonFilter.ReplaceAll(b, []byte("$1:$2")))
|
|
t.Time, err = time.Parse(time.RFC3339, s)
|
|
return
|
|
}
|
|
|
|
var (
|
|
timeszoneColonFilter = regexp.MustCompile(`(?m)(.+\d\d)(\d\d)$`)
|
|
)
|
|
|
|
func (event Event) ToMobilizonEvent() *mobilizon.Event {
|
|
// TODO: swap physical for online address depending on event.EventAttendanceMode
|
|
return &mobilizon.Event{
|
|
Title: mobilizon.String(event.Name),
|
|
Description: mobilizon.String(event.Description),
|
|
BeginsOn: event.StartDate.Time,
|
|
EndsOn: event.EndDate.Time,
|
|
PhysicalAddress: mobilizon.Address{
|
|
Description: mobilizon.String(event.Location.Name),
|
|
Street: mobilizon.String(event.Location.Address.Street),
|
|
PostalCode: mobilizon.String(event.Location.Address.PostCode),
|
|
Locality: mobilizon.String(event.Location.Address.City),
|
|
Country: mobilizon.String(event.Location.Address.Country),
|
|
},
|
|
Picture: mobilizon.Media{
|
|
URL: mobilizon.String(event.ImageURL),
|
|
},
|
|
}
|
|
}
|
|
|
|
type ContentProcessor func([]byte) []byte
|
|
|
|
func FindLdJsonInHtml(htmlContent io.Reader, contentCallback ContentProcessor) (*Event, error) {
|
|
var jsonld []byte
|
|
tokenizer := html.NewTokenizer(htmlContent)
|
|
tokenizer.AllowCDATA(true)
|
|
TOKENIZED:
|
|
for {
|
|
elem := tokenizer.Next()
|
|
switch elem {
|
|
case html.StartTagToken:
|
|
name, hasAttributes := tokenizer.TagName()
|
|
if string(name) == "script" {
|
|
var k, v []byte
|
|
for hasAttributes {
|
|
k, v, hasAttributes = tokenizer.TagAttr()
|
|
if string(k) == "type" && string(v) == "application/ld+json" {
|
|
tokenizer.Next()
|
|
jsonld = tokenizer.Text()
|
|
break TOKENIZED
|
|
}
|
|
}
|
|
}
|
|
case html.ErrorToken:
|
|
if err := tokenizer.Err(); err != io.EOF {
|
|
return nil, err
|
|
}
|
|
break TOKENIZED
|
|
}
|
|
}
|
|
|
|
if jsonld == nil {
|
|
return nil, ErrJsonLdNotFound
|
|
}
|
|
|
|
if contentCallback != nil {
|
|
jsonld = contentCallback(jsonld)
|
|
}
|
|
|
|
ldEvent := Event{}
|
|
return &ldEvent, json.Unmarshal(jsonld, &ldEvent)
|
|
}
|