update inst/doc

master v0.6.0
jan 1 year ago
parent 8d0746b263
commit 8ef52f8f59

@ -9,7 +9,10 @@ library(zoo) # rollmean()
## ----download----------------------------------------------------------------- ## ----download-----------------------------------------------------------------
# if you want to see results for a specific subset of boxes, # if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here # just specify a filter such as grouptag='ifgi' here
boxes = osem_boxes()
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## ----exposure_counts, message=FALSE------------------------------------------- ## ----exposure_counts, message=FALSE-------------------------------------------
exposure_counts = boxes %>% exposure_counts = boxes %>%

@ -43,7 +43,10 @@ So the first step is to retrieve *all the boxes*:
```{r download} ```{r download}
# if you want to see results for a specific subset of boxes, # if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here # just specify a filter such as grouptag='ifgi' here
boxes = osem_boxes()
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
``` ```
# Plot count of boxes by time {.tabset} # Plot count of boxes by time {.tabset}

File diff suppressed because one or more lines are too long

@ -9,18 +9,15 @@ library(zoo) # rollmean()
## ----download, results='hide', message=FALSE, warning=FALSE------------------- ## ----download, results='hide', message=FALSE, warning=FALSE-------------------
# if you want to see results for a specific subset of boxes, # if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here # just specify a filter such as grouptag='ifgi' here
boxes_all = osem_boxes()
boxes = boxes_all # boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31") boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
summary(boxes) -> summary.data.frame summary(boxes) -> summary.data.frame
## ----message=F, warning=F----------------------------------------------------- ## ---- message=FALSE, warning=FALSE--------------------------------------------
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
plot(boxes) plot(boxes)
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------

@ -45,8 +45,9 @@ So the first step is to retrieve *all the boxes*.
```{r download, results='hide', message=FALSE, warning=FALSE} ```{r download, results='hide', message=FALSE, warning=FALSE}
# if you want to see results for a specific subset of boxes, # if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here # just specify a filter such as grouptag='ifgi' here
boxes_all = osem_boxes()
boxes = boxes_all # boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
``` ```
# Introduction # Introduction
In the following we just want to have a look at the boxes created in 2022, so we filter for them. In the following we just want to have a look at the boxes created in 2022, so we filter for them.
@ -65,11 +66,7 @@ summary(boxes) -> summary.data.frame
Another feature of interest is the spatial distribution of the boxes: `plot()` Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though. can help us out here. This function requires a bunch of optional dependencies though.
```{r message=F, warning=F} ```{r, message=FALSE, warning=FALSE}
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
plot(boxes) plot(boxes)
``` ```

File diff suppressed because one or more lines are too long

@ -1,20 +1,17 @@
## ----setup, include=FALSE----------------------------------------------------- ## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(echo = TRUE)
## ----results = F-------------------------------------------------------------- ## ----results = FALSE----------------------------------------------------------
library(magrittr) library(magrittr)
library(opensensmapr) library(opensensmapr)
all_sensors = osem_boxes() # all_sensors = osem_boxes(cache = '.')
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
summary(all_sensors) summary(all_sensors)
## ----message=F, warning=F----------------------------------------------------- ## ---- message=FALSE, warning=FALSE--------------------------------------------
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
plot(all_sensors) plot(all_sensors)
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
@ -24,49 +21,54 @@ str(phenoms)
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
phenoms[phenoms > 20] phenoms[phenoms > 20]
## ----results = F-------------------------------------------------------------- ## ----results = FALSE, eval=FALSE----------------------------------------------
pm25_sensors = osem_boxes( # pm25_sensors = osem_boxes(
exposure = 'outdoor', # exposure = 'outdoor',
date = Sys.time(), # ±4 hours # date = Sys.time(), # ±4 hours
phenomenon = 'PM2.5' # phenomenon = 'PM2.5'
) # )
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
summary(pm25_sensors) summary(pm25_sensors)
plot(pm25_sensors) plot(pm25_sensors)
## ----------------------------------------------------------------------------- ## ---- results=FALSE, message=FALSE--------------------------------------------
library(sf) library(sf)
library(units) library(units)
library(lubridate) library(lubridate)
library(dplyr) library(dplyr)
# construct a bounding box: 12 kilometers around Berlin
berlin = st_point(c(13.4034, 52.5120)) %>%
st_sfc(crs = 4326) %>%
st_transform(3857) %>% # allow setting a buffer in meters
st_buffer(set_units(12, km)) %>%
st_transform(4326) %>% # the opensensemap expects WGS 84
st_bbox()
## ----results = F--------------------------------------------------------------
pm25 = osem_measurements(
berlin,
phenomenon = 'PM2.5',
from = now() - days(3), # defaults to 2 days
to = now()
)
plot(pm25) ## ----bbox, results = FALSE, eval=FALSE----------------------------------------
# # construct a bounding box: 12 kilometers around Berlin
# berlin = st_point(c(13.4034, 52.5120)) %>%
# st_sfc(crs = 4326) %>%
# st_transform(3857) %>% # allow setting a buffer in meters
# st_buffer(set_units(12, km)) %>%
# st_transform(4326) %>% # the opensensemap expects WGS 84
# st_bbox()
# pm25 = osem_measurements(
# berlin,
# phenomenon = 'PM2.5',
# from = now() - days(3), # defaults to 2 days
# to = now()
# )
#
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
plot(pm25)
## ---- warning=FALSE-----------------------------------------------------------
outliers = filter(pm25, value > 100)$sensorId outliers = filter(pm25, value > 100)$sensorId
bad_sensors = outliers[, drop = T] %>% levels() bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors) pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T) st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
pm25 %>% filter(invalid == FALSE) %>% plot() pm25 %>% filter(invalid == FALSE) %>% plot()

@ -28,11 +28,12 @@ Its main goals are to provide means for:
Before we look at actual observations, lets get a grasp of the openSenseMap Before we look at actual observations, lets get a grasp of the openSenseMap
datasets' structure. datasets' structure.
```{r results = F} ```{r results = FALSE}
library(magrittr) library(magrittr)
library(opensensmapr) library(opensensmapr)
all_sensors = osem_boxes() # all_sensors = osem_boxes(cache = '.')
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
``` ```
```{r} ```{r}
summary(all_sensors) summary(all_sensors)
@ -47,11 +48,7 @@ couple of minutes ago.
Another feature of interest is the spatial distribution of the boxes: `plot()` Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though. can help us out here. This function requires a bunch of optional dependencies though.
```{r message=F, warning=F} ```{r, message=FALSE, warning=FALSE}
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
plot(all_sensors) plot(all_sensors)
``` ```
@ -81,7 +78,7 @@ We should check how many sensor stations provide useful data: We want only those
boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting
measurements: measurements:
```{r results = F} ```{r results = FALSE, eval=FALSE}
pm25_sensors = osem_boxes( pm25_sensors = osem_boxes(
exposure = 'outdoor', exposure = 'outdoor',
date = Sys.time(), # ±4 hours date = Sys.time(), # ±4 hours
@ -89,6 +86,8 @@ pm25_sensors = osem_boxes(
) )
``` ```
```{r} ```{r}
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
summary(pm25_sensors) summary(pm25_sensors)
plot(pm25_sensors) plot(pm25_sensors)
``` ```
@ -101,12 +100,16 @@ We could call `osem_measurements(pm25_sensors)` now, however we are focusing on
a restricted area of interest, the city of Berlin. a restricted area of interest, the city of Berlin.
Luckily we can get the measurements filtered by a bounding box: Luckily we can get the measurements filtered by a bounding box:
```{r} ```{r, results=FALSE, message=FALSE}
library(sf) library(sf)
library(units) library(units)
library(lubridate) library(lubridate)
library(dplyr) library(dplyr)
```
Since the API takes quite long to response measurements, especially filtered on space and time, we do not run the following chunks for publication of the package on CRAN.
```{r bbox, results = FALSE, eval=FALSE}
# construct a bounding box: 12 kilometers around Berlin # construct a bounding box: 12 kilometers around Berlin
berlin = st_point(c(13.4034, 52.5120)) %>% berlin = st_point(c(13.4034, 52.5120)) %>%
st_sfc(crs = 4326) %>% st_sfc(crs = 4326) %>%
@ -114,8 +117,6 @@ berlin = st_point(c(13.4034, 52.5120)) %>%
st_buffer(set_units(12, km)) %>% st_buffer(set_units(12, km)) %>%
st_transform(4326) %>% # the opensensemap expects WGS 84 st_transform(4326) %>% # the opensensemap expects WGS 84
st_bbox() st_bbox()
```
```{r results = F}
pm25 = osem_measurements( pm25 = osem_measurements(
berlin, berlin,
phenomenon = 'PM2.5', phenomenon = 'PM2.5',
@ -123,15 +124,19 @@ pm25 = osem_measurements(
to = now() to = now()
) )
```
```{r}
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
plot(pm25) plot(pm25)
``` ```
Now we can get started with actual spatiotemporal data analysis. Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors: First, lets mask the seemingly uncalibrated sensors:
```{r} ```{r, warning=FALSE}
outliers = filter(pm25, value > 100)$sensorId outliers = filter(pm25, value > 100)$sensorId
bad_sensors = outliers[, drop = T] %>% levels() bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors) pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
``` ```
@ -139,7 +144,7 @@ pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
Then plot the measuring locations, flagging the outliers: Then plot the measuring locations, flagging the outliers:
```{r} ```{r}
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T) st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
``` ```
Removing these sensors yields a nicer time series plot: Removing these sensors yields a nicer time series plot:

File diff suppressed because one or more lines are too long

@ -27,25 +27,25 @@ b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
osem_clear_cache() # clears default cache osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache osem_clear_cache(getwd()) # clears a custom cache
## ----data, results='hide'----------------------------------------------------- ## ----data, results='hide', eval=FALSE-----------------------------------------
# first get our example data: # # first get our example data:
measurements = osem_measurements('Windgeschwindigkeit') # measurements = osem_measurements('Windgeschwindigkeit')
## ----serialize_json----------------------------------------------------------- ## ----serialize_json, eval=FALSE-----------------------------------------------
# serializing senseBoxes to JSON, and loading from file again: # # serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'measurements.json') # write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json')) # measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
class(measurements_from_file) # class(measurements_from_file)
## ----serialize_attrs---------------------------------------------------------- ## ----serialize_attrs, eval=FALSE----------------------------------------------
# note the toJSON call instead of serializeJSON # # note the toJSON call instead of serializeJSON
write(jsonlite::toJSON(measurements), 'measurements_bad.json') # write(jsonlite::toJSON(measurements), 'measurements_bad.json')
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json') # measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
class(measurements_without_attrs) # class(measurements_without_attrs)
#
measurements_with_attrs = osem_as_measurements(measurements_without_attrs) # measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
class(measurements_with_attrs) # class(measurements_with_attrs)
## ----cleanup, include=FALSE--------------------------------------------------- ## ----cleanup, include=FALSE, eval=FALSE---------------------------------------
file.remove('measurements.json', 'measurements_bad.json') # file.remove('measurements.json', 'measurements_bad.json')

@ -71,7 +71,7 @@ osem_clear_cache(getwd()) # clears a custom cache
If you want to roll your own serialization method to support custom data formats, If you want to roll your own serialization method to support custom data formats,
here's how: here's how:
```{r data, results='hide'} ```{r data, results='hide', eval=FALSE}
# first get our example data: # first get our example data:
measurements = osem_measurements('Windgeschwindigkeit') measurements = osem_measurements('Windgeschwindigkeit')
``` ```
@ -79,7 +79,7 @@ measurements = osem_measurements('Windgeschwindigkeit')
If you are paranoid and worry about `.rds` files not being decodable anymore If you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON. in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance. This of course comes at the cost of storage space and performance.
```{r serialize_json} ```{r serialize_json, eval=FALSE}
# serializing senseBoxes to JSON, and loading from file again: # serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'measurements.json') write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json')) measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
@ -90,7 +90,7 @@ This method also persists the R object metadata (classes, attributes).
If you were to use a serialization method that can't persist object metadata, you If you were to use a serialization method that can't persist object metadata, you
could re-apply it with the following functions: could re-apply it with the following functions:
```{r serialize_attrs} ```{r serialize_attrs, eval=FALSE}
# note the toJSON call instead of serializeJSON # note the toJSON call instead of serializeJSON
write(jsonlite::toJSON(measurements), 'measurements_bad.json') write(jsonlite::toJSON(measurements), 'measurements_bad.json')
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json') measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
@ -101,6 +101,6 @@ class(measurements_with_attrs)
``` ```
The same goes for boxes via `osem_as_sensebox()`. The same goes for boxes via `osem_as_sensebox()`.
```{r cleanup, include=FALSE} ```{r cleanup, include=FALSE, eval=FALSE}
file.remove('measurements.json', 'measurements_bad.json') file.remove('measurements.json', 'measurements_bad.json')
``` ```

@ -12,7 +12,7 @@
<meta name="author" content="Norwin Roosen" /> <meta name="author" content="Norwin Roosen" />
<meta name="date" content="2023-02-23" /> <meta name="date" content="2023-03-08" />
<title>Caching openSenseMap Data for Reproducibility</title> <title>Caching openSenseMap Data for Reproducibility</title>
@ -341,7 +341,7 @@ code > span.er { color: #a61717; background-color: #e3d2d2; }
<h1 class="title toc-ignore">Caching openSenseMap Data for <h1 class="title toc-ignore">Caching openSenseMap Data for
Reproducibility</h1> Reproducibility</h1>
<h4 class="author">Norwin Roosen</h4> <h4 class="author">Norwin Roosen</h4>
<h4 class="date">2023-02-23</h4> <h4 class="date">2023-03-08</h4>
@ -407,21 +407,17 @@ storage space and performance.</p>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">serializeJSON</span>(measurements), <span class="st">&#39;measurements.json&#39;</span>)</span> <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">serializeJSON</span>(measurements), <span class="st">&#39;measurements.json&#39;</span>)</span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>measurements_from_file <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">unserializeJSON</span>(readr<span class="sc">::</span><span class="fu">read_file</span>(<span class="st">&#39;measurements.json&#39;</span>))</span> <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>measurements_from_file <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">unserializeJSON</span>(readr<span class="sc">::</span><span class="fu">read_file</span>(<span class="st">&#39;measurements.json&#39;</span>))</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_from_file)</span></code></pre></div> <span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_from_file)</span></code></pre></div>
<pre><code>## [1] &quot;osem_measurements&quot; &quot;tbl_df&quot; &quot;tbl&quot;
## [4] &quot;data.frame&quot;</code></pre>
<p>This method also persists the R object metadata (classes, <p>This method also persists the R object metadata (classes,
attributes). If you were to use a serialization method that cant attributes). If you were to use a serialization method that cant
persist object metadata, you could re-apply it with the following persist object metadata, you could re-apply it with the following
functions:</p> functions:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">&#39;measurements_bad.json&#39;</span>)</span> <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">&#39;measurements_bad.json&#39;</span>)</span> <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span></code></pre></div> <span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span>
<pre><code>## [1] &quot;data.frame&quot;</code></pre> <span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span> <span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div> <span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div>
<pre><code>## [1] &quot;osem_measurements&quot; &quot;tbl_df&quot; &quot;tbl&quot;
## [4] &quot;data.frame&quot;</code></pre>
<p>The same goes for boxes via <code>osem_as_sensebox()</code>.</p> <p>The same goes for boxes via <code>osem_as_sensebox()</code>.</p>
</div> </div>

Loading…
Cancel
Save