4  Implementation of LGCP in INLA and INLABRU

Having completed an examination of Air Pollutoin modelling in INLA and INLABRU, it is time to move on to modelling the point process

Using data from GBIF.org (10 January 2025) GBIF Occurrence Download https://doi.org/10.15468/dl.p5cy6n

4.1 LGCP in INLA introduction

Following https://www.paulamoraga.com/book-spatial/point-process-modeling.html for the INLA intro.

library(INLA)
Loading required package: Matrix
Loading required package: sp
This is INLA_24.05.01-1 built 2024-05-01 18:49:50 UTC.
 - See www.r-inla.org/contact-us for how to get help.
 - List available models/likelihoods/etc with inla.list.models()
 - Use inla.doc(<NAME>) to access documentation
library(inlabru)
Loading required package: fmesher
library(sf)
Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(ggplot2)
library(viridis)
Loading required package: viridisLite
library(terra)
terra 1.7.78
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:terra':

    intersect, union
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(readr)

TODO: Fix issue reading data. StillIMage is doing something

file_name <- file.path("D:", "data", "gbif_observation_org_butterflies", "gbif_butterfly_observation-org", "gbif_subset_netherlands_lepidoptera.csv")
d <- read_delim(file_name, delim='\t', col_types=cols(infraspecificEpithet=col_character()))
d %>% filter(species=="Lasiommata megera") -> d
dim(d)
[1] 2705   50
sum(is.na(d))
[1] 45001
d <- st_as_sf(d, coords = c("decimalLongitude", "decimalLatitude"))
st_crs(d) <- "EPSG:4326"
st_crs("EPSG:3857")$proj4string
[1] "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"
projMercator <- st_crs("EPSG:3857")$proj4string
projMercator
[1] "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"
# Observed coordinates
d <- st_transform(d, crs = projMercator)
d
Simple feature collection with 2705 features and 48 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 411882.1 ymin: 6577190 xmax: 740274.6 ymax: 7066673
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 2,705 × 49
       gbifID datasetKey    occurrenceID kingdom phylum class order family genus
 *      <dbl> <chr>         <chr>        <chr>   <chr>  <chr> <chr> <chr>  <chr>
 1 4160322107 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 2 4409008265 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 3 4462065068 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 4 4160750394 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 5 4159129901 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 6 4410120094 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 7 4410216900 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 8 4409871777 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
 9 4409311703 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
10 4158114004 8a863029-f43… https://obs… Animal… Arthr… Inse… Lepi… Nymph… Para…
# ℹ 2,695 more rows
# ℹ 40 more variables: species <chr>, infraspecificEpithet <chr>,
#   taxonRank <chr>, scientificName <chr>, verbatimScientificName <chr>,
#   verbatimScientificNameAuthorship <lgl>, countryCode <chr>, locality <chr>,
#   stateProvince <chr>, occurrenceStatus <chr>, individualCount <dbl>,
#   publishingOrgKey <chr>, coordinateUncertaintyInMeters <dbl>,
#   coordinatePrecision <lgl>, elevation <lgl>, elevationAccuracy <lgl>, …
?st_layers
starting httpd help server ... done
layers <- st_layers(file.path("D:", "data", "maps", "netherlands_bestuurlijkegrenzen_2021", "bestuurlijkegrenzen.gpkg"))
#print(str(layers))
map <- st_read(file.path("D:", "data", "maps", "netherlands_bestuurlijkegrenzen_2021", "bestuurlijkegrenzen.gpkg"), layer = "landsgrens")
Reading layer `landsgrens' from data source 
  `D:\data\maps\netherlands_bestuurlijkegrenzen_2021\bestuurlijkegrenzen.gpkg' 
  using driver `GPKG'
Simple feature collection with 1 feature and 2 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 10425.16 ymin: 306846.2 xmax: 278026.1 ymax: 621876.3
Projected CRS: Amersfoort / RD New
map <- st_union(map)
map <- st_as_sf(map)
plot(map)

# Damn it, there's a little isolated spec in the map!
border_polygon <- st_cast(map, "POLYGON")
border_polygon <- st_as_sfc(border_polygon)
geos <- lapply(border_polygon, function(x) x[1])
for (g in geos){
  plot(st_polygon(g))
}

# Get the border polygon
border_final <- st_polygon(geos[[1]])
# We still need sf object
border_final <- st_sfc(border_final, crs=st_crs(map))
border_final <- st_as_sf(border_final)
plot(border_final)

map <- border_final
map <- st_transform(map, crs = projMercator)
coo <- st_coordinates(d)
ggplot() + geom_sf(data = map) +
  geom_sf(data = d) + coord_sf(datum = projMercator)

# raster grid covering map
grid <- terra::rast(map, nrows = 50, ncols = 50)
# coordinates of all cells
xy <- terra::xyFromCell(grid, 1:ncell(grid))
# transform points to a sf object
dp <- st_as_sf(as.data.frame(xy), coords = c("x", "y"),
               crs = st_crs(map))

# indices points within the map
indicespointswithin <- which(st_intersects(dp, map,
                                           sparse = FALSE))

# points within the map
dp <- st_filter(dp, map)

ggplot() + geom_sf(data = map) +
  geom_sf(data = dp) + coord_sf(datum = projMercator)

coop <- st_coordinates(dp)

Hmm, why do I need such a large unit? Something with mercator? Oh, am I in meters?

map
Simple feature collection with 1 feature and 0 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 368237.9 ymin: 6577255 xmax: 804561.4 ymax: 7090341
Projected CRS: WGS 84 / Pseudo-Mercator
                               x
1 POLYGON ((669987.2 6579597,...
loc.d <- cbind(st_coordinates(map)[, 1], st_coordinates(map)[, 2])
#mesh <- inla.mesh.2d(loc=coo, max.edge = c(50000, 100000))
#mesh <- inla.mesh.2d(loc.domain=loc.d)
mesh <- inla.mesh.2d(loc.domain = loc.d, max.edge = c(50000, 100000), crs=crs(d))
mesh
fm_mesh_2d object:
  CRS:
    LegacyPROJ4:    +proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs
    WKT: (only shown with verbose = TRUE)
  Manifold: R2
  V / E / T:    326 / 930 / 605
  Euler char.:  1
  Constraints:  45 boundary edges (1 group: 0), 56 interior edges (1 group: 0)
  Bounding box: (243337.2,929462.1) x (6452354,7215241)
  Basis d.o.f.: 326
plot(mesh)
points(coo, col = "red")
axis(1)
axis(2)

(nv <- mesh$n)
[1] 326
(n <- nrow(coo))
[1] 2705
spde <- inla.spde2.matern(mesh = mesh, alpha = 2, constr = TRUE)
book.mesh.dual <- function(mesh) {
    if (mesh$manifold=='R2') {
        ce <- t(sapply(1:nrow(mesh$graph$tv), function(i)
            colMeans(mesh$loc[mesh$graph$tv[i, ], 1:2])))
        library(parallel)
        pls <- mclapply(1:mesh$n, function(i) {
            p <- unique(Reduce('rbind', lapply(1:3, function(k) {
            j <- which(mesh$graph$tv[,k]==i)
            if (length(j)>0) 
            return(rbind(ce[j, , drop=FALSE],
            cbind(mesh$loc[mesh$graph$tv[j, k], 1] +
            mesh$loc[mesh$graph$tv[j, c(2:3,1)[k]], 1], 
            mesh$loc[mesh$graph$tv[j, k], 2] +
            mesh$loc[mesh$graph$tv[j, c(2:3,1)[k]], 2])/2))
            else return(ce[j, , drop=FALSE])
            })))
            j1 <- which(mesh$segm$bnd$idx[,1]==i)
            j2 <- which(mesh$segm$bnd$idx[,2]==i)
            if ((length(j1)>0) | (length(j2)>0)) {
            p <- unique(rbind(mesh$loc[i, 1:2], p,
            mesh$loc[mesh$segm$bnd$idx[j1, 1], 1:2]/2 +
            mesh$loc[mesh$segm$bnd$idx[j1, 2], 1:2]/2, 
            mesh$loc[mesh$segm$bnd$idx[j2, 1], 1:2]/2 +
            mesh$loc[mesh$segm$bnd$idx[j2, 2], 1:2]/2))
            yy <- p[,2]-mean(p[,2])/2-mesh$loc[i, 2]/2
            xx <- p[,1]-mean(p[,1])/2-mesh$loc[i, 1]/2
            }
            else {
            yy <- p[,2]-mesh$loc[i, 2]
            xx <- p[,1]-mesh$loc[i, 1]
            }
            Polygon(p[order(atan2(yy,xx)), ])
        })
        return(SpatialPolygons(lapply(1:mesh$n, function(i)
            Polygons(list(pls[[i]]), i))))
    }
    else stop("It only works for R2!")
}
dmesh <- book.mesh.dual(mesh)
plot(dmesh)
axis(1)
axis(2)

We then do something a little tricky. The mesh is larger than the domain that the points were observed in or the study region. So the intersections between the polygons in the mesh and the locations in \(D\) are computed

plot(loc.d)

domain.polys <- Polygons(list(Polygon(loc.d)), '0')
domainSP <- SpatialPolygons(list(domain.polys))
plot(domainSP)

domain_sf <- st_as_sf(domainSP)
crs(domain_sf)
[1] ""
domain_sf <- st_set_crs(domain_sf, projMercator)
st_crs(domain_sf)
Coordinate Reference System:
  User input: +proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs 
  wkt:
PROJCRS["WGS 84 / Pseudo-Mercator",
    BASEGEOGCRS["WGS 84",
        DATUM["World Geodetic System 1984",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4326]],
    CONVERSION["unnamed",
        METHOD["Popular Visualisation Pseudo Mercator",
            ID["EPSG",1024]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["False easting",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["(E)",east,
            ORDER[1],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]],
        AXIS["(N)",north,
            ORDER[2],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]]
plot(domain_sf)

mesh_sf <- st_as_sf(dmesh)
mesh_sf <- st_set_crs(mesh_sf, projMercator)
# Check if the mesh polygons overlap with any of the locations 
w <- sapply(1:length(dmesh), function(i) {
  if(length(st_intersects(mesh_sf[i,], domain_sf)[[1]])>0){
    return(sf::st_area(sf::st_intersection(mesh_sf[i, ], domain_sf)))
  }
  else return(0)
})
sum(w)
[1] 111060620373
st_area(map)
111060620373 [m^2]
plot(mesh)
plot(domain_sf, add=T, col="green")
points(mesh$loc[which(w > 0), 1:2], col = "black", pch = 20)
points(mesh$loc[which(w == 0), 1:2], col = "red", pch = 20)

TODO: January 13th. Okay, now i’m stuck again. Finish later

y.pp <- rep(0:1, c(nv, n))
e.pp <- c(w, rep(0, n))
# Projection matrix for the integration points (mesh vertices)
A.int <- Diagonal(nv, rep(1, nv))
# Projection matrix for observed points (event locations)
A.y <- inla.spde.make.A(mesh = mesh, loc = coo)
# Projection matrix for mesh vertices and event locations
A.pp <- rbind(A.int, A.y)

# We also create the projection matrix Ap.pp for the prediction locations.
Ap.pp <- inla.spde.make.A(mesh = mesh, loc = coop)
# stack for estimation
stk.e.pp <- inla.stack(tag = "est.pp",
data = list(y = y.pp, e = e.pp), 
A = list(1, A.pp),
effects = list(list(b0 = rep(1, nv + n)), list(s = 1:nv)))

# stack for prediction stk.p
stk.p.pp <- inla.stack(tag = "pred.pp",
data = list(y = rep(NA, nrow(coop)), e = rep(0, nrow(coop))),
A = list(1, Ap.pp),
effects = list(data.frame(b0 = rep(1, nrow(coop))),
               list(s = 1:nv)))

# stk.full has stk.e and stk.p
stk.full.pp <- inla.stack(stk.e.pp, stk.p.pp)
formula <- y ~ 0 + b0 + f(s, model = spde)
res <- inla(formula,  family = 'poisson',
  data = inla.stack.data(stk.full.pp),
  control.inla=list(int.strategy = 'grid', strategy="laplace"),
  control.predictor = list(compute = TRUE, link = 1,
    A = inla.stack.A(stk.full.pp)),
    E = inla.stack.data(stk.full.pp)$e)
res$summary.fixed
       mean       sd 0.025quant 0.5quant 0.975quant    mode          kld
b0 1.039501 1.065052  -1.049386 1.039649   3.127548 1.03965 5.801275e-11
res$summary.hyperpar
                  mean          sd 0.025quant   0.5quant 0.975quant       mode
Theta1 for s   8.14040 0.001247988   8.137666   8.140495   8.142519   8.140964
Theta2 for s -11.45034 0.009720901 -11.466856 -11.451076 -11.429055 -11.454715
index <- inla.stack.index(stk.full.pp, tag = "pred.pp")$data
pred_mean <- res$summary.fitted.values[index, "mean"]
pred_ll <- res$summary.fitted.values[index, "0.025quant"]
pred_ul <- res$summary.fitted.values[index, "0.975quant"]
grid$mean <- NA
grid$ll <- NA
grid$ul <- NA

grid$mean[indicespointswithin] <- pred_mean
grid$ll[indicespointswithin] <- pred_ll
grid$ul[indicespointswithin] <- pred_ul

Then plot the predicted intensity

plot(grid)
geom_sf(data=grid, aes(color=mean)) +
  scale_colour_gradient(low = "blue", high = "yellow")

We have to make sure to get the domain for the sampler correct. The INLA code does it above, but inlabru by default does not.

More about that https://inlabru-org.github.io/inlabru/articles/2d_lgcp_plotsampling.html and actually the exact problem here: https://groups.google.com/g/r-inla-discussion-group/c/0bBC9bVV-L4 even though the problem was with preferential sampling. In the mesh-process R section above, you can see the manipulation to get the domains correct for INLA. Even with including sampler=domain_sf here, the estimates are not exactly the same as INLA, but closer than it was before.

TODO: What is this domain stuff for exactly? The integration? Should be equation 3 of Simpson (2016).

# TODO: Make sure I get the same result as inla. Options and mesh are off
# Oh nice we can name the intercept but then need to subtract 1 to get rid of the default intercept
formula_inlabru <- geometry ~ b0(1) - 1 + f(geometry, model = spde)
fit1 <- lgcp(formula_inlabru, data=d, sampler=domain_sf, domain = list(geometry = mesh), 
             options = list(control.inla=list(int.strategy = 'ccd', strategy="laplace")))
#                          control.compute=list(config=TRUE),
#                          control.results=list(return.marginals.random = TRUE,
#                                               return.marginals.predictor = TRUE),
#                          control.predictor = list(compute = TRUE)))

summary(fit1)
inlabru version: 2.12.0
INLA version: 24.05.01-1
Components:
b0: main = linear(1), group = exchangeable(1L), replicate = iid(1L), NULL
f: main = spde(geometry), group = exchangeable(1L), replicate = iid(1L), NULL
Likelihoods:
  Family: 'cp'
    Tag: ''
    Data class: 'sf', 'data.frame'
    Response class: 'numeric'
    Predictor: geometry ~ .
    Used components: effects[b0, f], latent[]
Time used:
    Pre = 0.691, Running = 1.47, Post = 0.175, Total = 2.33 
Fixed effects:
      mean  sd 0.025quant 0.5quant 0.975quant   mode kld
b0 -21.688 1.4    -24.492  -21.662    -19.041 -21.66   0

Random effects:
  Name    Model
    f SPDE2 model

Model hyperparameters:
               mean    sd 0.025quant 0.5quant 0.975quant   mode
Theta1 for f   8.85 0.106       8.66     8.85       9.07   8.83
Theta2 for f -12.74 0.873     -14.74   -12.63     -11.41 -12.07

Deviance Information Criterion (DIC) ...............: -126820.99
Deviance Information Criterion (DIC, saturated) ....: NA
Effective number of parameters .....................: -127802.62

Watanabe-Akaike information criterion (WAIC) ...: 7199.81
Effective number of parameters .................: 2414.72

Marginal log-Likelihood:  -64580.42 
 is computed 
Posterior summaries for the linear predictor and the fitted values are computed
(Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')

TODO: Oh wow, why is b0 so different between inla and inlabru.

predictions1 <- predict(fit1, newdata=dp, formula = ~ b0 + f)
predictions2 <- predict(fit1, newdata=dp, formula = ~ f)
ggplot() +
geom_sf(data=predictions1, aes(color=mean)) +
  scale_colour_gradient(low = "blue", high = "yellow")

# Check the contribution of just the spatial field
ggplot() +
geom_sf(data=predictions2, aes(color=mean)) +
  scale_colour_gradient(low = "blue", high = "yellow")

Then, we can move onto multiple likelihoods and inlabru