diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..55a43c0 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,14 @@ +Package: opensensemap +Type: Package +Title: Work with Sensor Data from opensensemap.org in R +Version: 0.1.0 +Imports: lubridate, httr, sf +Author: Norwin Roosen +Maintainer: Norwin Roosen +Description: This packages ingests data (measurements, sensorstations) from the + API of opensensemap.org and transforms them into easy to use data.tables. + It uses the sf package for spatial handling of datapoints and aims to be + compatible with the tidyverse. +License: GPL-2 +Encoding: UTF-8 +LazyData: true diff --git a/R/api.R b/R/api.R new file mode 100644 index 0000000..7580511 --- /dev/null +++ b/R/api.R @@ -0,0 +1,49 @@ +# does not actually get called by the user. ... contains all the query parameters. +# the proxy is just for parameter autocompletion, filtering out the endpoint +get_boxes_ = function (..., endpoint) { + response = httr::GET(endpoint, path = c('boxes'), query = list(...)) %>% + httr::content() %>% + osem_remote_error() + + if (length(response) == 0) { + warning('no boxes found for this query') + return(response) + } + + # parse each list element as sensebox & combine them to a single data.frame + # + # bind_rows() kills the attributes and classes of sf_sfc column, and warns + # about that. see https://github.com/r-spatial/sf/issues/49 + # we readd the attributes manually afterwards, so we can ignore the warnings. + # rbind() wouldn't have this problem, but would be far slower and con't + # handle missing columns, so this seems like a good tradeoff.. + boxesList = lapply(response, parse_senseboxdata) + suppressWarnings({ data = dplyr::bind_rows(boxesList) }) + data$geometry = sf::st_sfc(data$geometry, crs = 4326) + #sf::st_geometry(data) = sf::st_sfc(data$geometry, crs = 4326) + #sf::st_sf(data) + + data +} + +get_box_ = function (..., endpoint) { + httr::GET(endpoint, path = c('boxes', ...)) %>% + httr::content() %>% + osem_remote_error() %>% + parse_senseboxdata() +} + +get_measurements_ = function (..., endpoint) { + httr::GET(endpoint, path = c('boxes', 'data'), query = list(...)) %>% + httr::content() %>% + osem_remote_error() +} + +get_stats_ = function (endpoint) { + result = httr::GET(endpoint, path = c('stats')) %>% + httr::content() %>% + osem_remote_error() + + names(result) = c('boxes', 'measurements', 'measurements_per_minute') + result +} diff --git a/R/api_utils.R b/R/api_utils.R new file mode 100644 index 0000000..4a8aeda --- /dev/null +++ b/R/api_utils.R @@ -0,0 +1,11 @@ +osem_remote_error = function (response) { + if (!is.null(response$code)) stop(response$message) + + invisible(response) +} + +date_as_isostring = function (date) { + # as_datetime is required so UTC times are always returned + # TODO: check if we can get along without lubridate dependency? + lubridate::as_datetime(date) %>% format.Date(format = '%FT%TZ') +} diff --git a/R/box_api.R b/R/box_api.R new file mode 100644 index 0000000..d428c92 --- /dev/null +++ b/R/box_api.R @@ -0,0 +1,47 @@ +`%>%` = magrittr::`%>%` + +osem_boxes = function (exposure = NA, model = NA, grouptag = NA, + date = NA, from = NA, to = NA, phenomenon = NA, + endpoint = 'https://api.opensensemap.org') { + + # error, if phenomenon, but no time given + if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from)) + stop('Parameter "phenomenon" can only be used together with "date" or "from"/"to"') + + # error, if date and from/to given + if (!is.na(date) && (!is.na(to) || !is.na(from))) + stop('Parameter "date" cannot be used together with "from"/"to"') + + # error, if only one of from/to given + if ( + (!is.na(to) && is.na(from)) || + (is.na(to) && !is.na(from)) + ) { + stop('Parameter "from"/"to" must be used together') + } + + query = list(endpoint = endpoint) + if (!is.na(exposure)) query$exposure = exposure + if (!is.na(model)) query$model = model + if (!is.na(grouptag)) query$grouptag = grouptag + if (!is.na(phenomenon)) query$phenomenon = phenomenon + + if (!is.na(to) && !is.na(from)) { + # error, if from is after to + if (lubridate::as_datetime(from) - lubridate::as_datetime(to) > 0) + stop('Parameter "from" must be earlier than "to"') + # convert dates to commaseparated UTC ISOdates + query$date = c(date_as_isostring(from), date_as_isostring(to)) %>% + paste(collapse = ',') + + print(query$date) + } else if (!is.na(date)) { + query$date = format.Date(lubridate::as_datetime(date), '%FT%TZ') + } + + do.call(get_boxes_, query) +} + +osem_box = function (boxId, endpoint = 'https://api.opensensemap.org') { + get_box_(boxId, endpoint) +} diff --git a/R/box_parse.R b/R/box_parse.R new file mode 100644 index 0000000..a279b98 --- /dev/null +++ b/R/box_parse.R @@ -0,0 +1,44 @@ +parse_senseboxdata = function (boxdata) { + # extract nested lists for later use & clean them from the list + # to allow a simple data.frame structure + sensors = boxdata$sensors + location = boxdata$loc + boxdata[c('loc', 'sensors', 'image', 'boxType')] <- NULL + thebox = as.data.frame(boxdata) + + # parse timestamps (updatedAt might be not defined) + thebox$createdAt = as.POSIXct(strptime(thebox$createdAt, format='%FT%T', tz = 'GMT')) + if (!is.null(thebox$updatedAt)) + thebox$updatedAt = as.POSIXct(strptime(thebox$updatedAt, format='%FT%T', tz = 'GMT')) + + # extract metadata from sensors + thebox$phenomena = list(unlist(lapply(sensors, function(s) { s$title }))) + # FIXME: if one sensor has NA, max() returns bullshit + thebox$lastMeasurement = max(lapply(sensors, function(s) { + if (!is.null(s$lastMeasurement)) + as.POSIXct(strptime(s$lastMeasurement$createdAt, format = '%FT%T', tz = 'GMT')) + else + NA + })[[1]]) + + # extract coordinates & transform to simple feature object + thebox$lng = location[[1]]$geometry$coordinates[[1]] + thebox$lat = location[[1]]$geometry$coordinates[[2]] + if (length(location[[1]]$geometry$coordinates) == 3) + thebox$height = location[[1]]$geometry$coordinates[[3]] + + # sf does not like to combine 2D and 3D coords, so we just use 2D + # IDEA: convert to sf only after rbind? + thebox = sf::st_as_sf(thebox, coords = c('lng', 'lat'), crs = 4326) + + # attach a custom class for methods + class(thebox) = c('sensebox', class(thebox)) + thebox +} + +get_phenomena = function (x, ...) UseMethod('get_phenomena') +get_phenomena.default = function (x, ...) stop('not implemented') +get_phenomena.sensebox = function (x, ...) { + # FIXME: only returns first box for get_boxes! + x$phenomena[[1]] +} diff --git a/R/box_utils.R b/R/box_utils.R new file mode 100644 index 0000000..541b992 --- /dev/null +++ b/R/box_utils.R @@ -0,0 +1,62 @@ +`%>%` = magrittr::`%>%` + +plot.sensebox = function (x) { + # TODO: background map? + geom = sf::st_geometry(x) + plot(geom, graticule = st_crs(geom), axes = TRUE) + invisible(x) +} + +print.sensebox = function(x) { + important_columns = c('name', 'exposure', 'lastMeasurement', 'phenomena') + data = as.data.frame(x) # to get rid of the sf::`<-[` override.. + message(class(data)) + print(data[important_columns]) + invisible(x) +} + +summary.sensebox = function(x) { + df = as.data.frame(x) # the sf methods are messing with us again.. + + cat('boxes total:', nrow(df), fill = T) + cat('\nboxes by exposure:') + table(df$exposure) %>% print() + cat('\nboxes by model:') + table(df$model) %>% print() + cat('\n') + + diffNow = (lubridate::now() - df$lastMeasurement) %>% as.numeric(unit='hours') + neverActive = df[is.na(df$lastMeasurement), ] %>% nrow() + list( + 'last_measurement_within' = c( + '1h' = nrow(df[diffNow <= 1, ]) - neverActive, + '1d' = nrow(df[diffNow <= 24, ]) - neverActive, + '30d' = nrow(df[diffNow <= 720, ]) - neverActive, + '365d' = nrow(df[diffNow <= 8760, ]) - neverActive, + 'never' = neverActive + ) + ) %>% + print() + + oldest = df[df$createdAt == min(df$createdAt), ] + newest = df[df$createdAt == max(df$createdAt), ] + cat('oldest box:', format(oldest$createdAt, '%F %T'), paste0('(', oldest$name, ')'), fill = T) + cat('newest box:', format(newest$createdAt, '%F %T'), paste0('(', newest$name, ')'), fill = T) + + cat('\nsensors per box:', fill = T) + lapply(df$phenomena, length) %>% + as.numeric() %>% + summary() %>% + print() + + invisible(x) +} + +osem_phenomena = function (x) UseMethod('osem_phenomena') +osem_phenomena.default = function (x) stop('not implemented') +osem_phenomena.sensebox = function (x) { + Reduce(`c`, x$phenomena) %>% # get all the row contents in a single vector + table() %>% # get the counts + t() %>% # transform the table to an easier to work with df + as.data.frame.matrix() +} diff --git a/R/counts_api.R b/R/counts_api.R new file mode 100644 index 0000000..fc2e80d --- /dev/null +++ b/R/counts_api.R @@ -0,0 +1,3 @@ +osem_counts = function (endpoint = 'https://api.opensensemap.org') { + get_stats_(endpoint) +} diff --git a/R/measurement_api.R b/R/measurement_api.R new file mode 100644 index 0000000..e9e8be5 --- /dev/null +++ b/R/measurement_api.R @@ -0,0 +1,46 @@ +osem_measurements = function (x, ...) UseMethod('osem_measurements') + +osem_measurements.default = function (phenomenon, ...) { + bbox = structure(c(-180, -90, 180, 90), class = 'bbox') + osem_measurements(bbox, phenomenon, ...) +} + +# /boxes/data?bbox= +osem_measurements.bbox = function (bbox, phenomenon, exposure = NA, + from = NA, to = NA, + columns = NA, + endpoint = 'https://api.opensensemap.org') { + query = parse_get_measurements_params(as.list(environment())) + do.call(get_measurements_, query) +} + +# /boxes/data?boxId=1,2,3,4 +osem_measurements.sensebox = function (boxes, phenomenon, exposure = NA, + from = NA, to = NA, + columns = NA, + endpoint = 'https://api.opensensemap.org') { + query = parse_get_measurements_params(as.list(environment())) + do.call(get_measurements_, query) +} + +parse_get_measurements_params = function (params) { + if (is.null(params$phenomenon) | is.na(params$phenomenon)) + stop('Parameter "phenomenon" is required') + if (!is.na(params$from) && is.na(params$to)) stop('specify "from" only together with "to"') + if ( + (!is.null(params$bbox) && !is.null(params$boxes)) || + (is.null(params$bbox) && is.null(params$boxes)) + ) stop('Specify either "bbox" or "boxes"') + + query = list(endpoint = params$endpoint, phenomenon = params$phenomenon) + + if (!is.null(params$boxes)) query$boxIds = paste(params$boxes$X_id, collapse = ',') + if (!is.null(params$bbox)) query$bbox = paste(params$bbox, collapse = ',') + + if (!is.na(params$from)) query$`from-date` = date_as_isostring(params$from) + if (!is.na(params$to)) query$`to-date` = date_as_isostring(params$to) + if (!is.na(params$exposure)) query$exposure = params$exposure + if (!is.na(params$columns)) query$columns = paste(params$columns, collapse = ',') + + query +}