Testing the equality of mean densities with an application to climate change in Vietnam

Keywords

Bayes spaces

1 Introduction

2 Reminders

2.1 Reminders on functional one-sample problem

2.2 Reminders on functional analysis of variance

2.3 Reminders on distributional data analysis and Bayes spaces

3 Testing and interpreting mean densities in the Bayes space

3.1 One-sample problem

3.1.1 \(L^2\) approach

3.1.2 PC approach

3.2 Distributional ANOVA

3.2.1 \(\operatorname{L^2B}\) statistic

3.2.2 \(\operatorname{FB}\) statistic

3.2.3 \(\operatorname{CS}\) statistic

3.2.4 \(\operatorname{GPF}\) statistic

3.2.5 \(F_{\max}\) statistic

3.3 Pairwise comparisons

3.4 Interval-wise interpretation

5 Conclusion and perspectives

Acknowledgments

Global one-sample problem

Code
vnt_mean <- mean(c(vnt$t_max))

plot(vnt_mean) +
  geom_hline(yintercept = 1 / diff(vnt$t_max[[1]]$basis$rangeval), linetype = "dotted") +
  scale_y_continuous(n.breaks = 4) +
  labs(
    x = "Temperature (deg. Celsius)",
    y = "Density"
  )

plot(as.fd(as.list(vnt_mean))) +
  scale_y_continuous(n.breaks = 4) +
  labs(
    x = "Temperature (deg. Celsius)",
    y = "clr"
  )
Figure 12: Mean annual slope of the maximum temperature distributions between 1987 and 2016.
Figure 13: Centered log-ratio transform of the mean annual slope of maximum temperature distributions between 1987 and 2016.
Code
kable_format(one_sample(c(vnt$t_max)))
Table 4: Test statistics and \(p\)-values for the global one-sample problem.
Name Statistic p-value
PC 38 3.1e-08 ***
Norm 1.3 1.3e-06 ***
Code
thres0 <- 0.5 * (fmax(vnt_mean, 25, 30) + fmin(vnt_mean, 20, 25))

vnt_mean_thresholds <- tibble(
  x = c(
    fsolve(vnt_mean, fmax(vnt_mean, 25, 30), 15, 20),
    fsolve(vnt_mean, thres0, 15, 22),
    fsolve(vnt_mean, thres0, 22, 30),
    fsolve(vnt_mean, thres0, 30, 35),
    fsolve(vnt_mean, fmin(vnt_mean, 20, 25), 30, 35)
  ),
  y = c(
    fmax(vnt_mean, 25, 30),
    thres0,
    thres0,
    thres0,
    fmin(vnt_mean, 20, 25)
  )
)

vnt_mean_thresholds |> signif(3)
x y
18.2 0.0362
19.6 0.0359
25.4 0.0359
32.9 0.0359
33.4 0.0355
Figure 14: Global trend.
Code
plot(vnt_mean) +
  geom_vline(data = vnt_mean_thresholds, aes(xintercept = x, color = y, linetype = -y)) +
  geom_hline(data = vnt_mean_thresholds, aes(yintercept = y, color = y, linetype = -y)) +
  scale_linetype_binned() +
  scale_y_continuous(n.breaks = 4) +
  theme(legend.position = "none") +
  labs(
    x = "Temperature (deg. Celsius)",
    y = "Density"
  )
Figure 15: Global trend.

Centered regional mean slopes

Code
vnt |>
  select(!province) |>
  mutate(t_max = center(t_max)) |>
  group_by(region, region_name) |>
  summarise(t_max = mean(t_max), .groups = "drop") |>
  mutate(t_max = as.fd(t_max)) |>
  plot_funs(t_max, color = region_name) +
  scale_color_viridis_d(option = "magma", end = 0.8) +
  scale_y_continuous(n.breaks = 4) +
  theme(legend.position = "none") +
  labs(
    x = "Temperature (deg. Celsius)",
    y = "Centered log-ratio transform"
  )
Figure 16: Regional mean slope, centered by the global mean.

Residuals in temporal regression

Code
vnt |>
  select(!rsq) |>
  unnest(ssr_evol) |>
  ggplot(aes(t, ssr, color = region, group = province)) +
  geom_point() +
  geom_line() +
  facet_grid(vars(region))
Figure 17: \(\frac{\| \varepsilon (t) \|^2}{\| y(t) - \overline y (t) \|^2}\).
Code
vietnam_provinces |>
  left_join(vnt, by = "province") |>
  ggplot(aes(fill = rsq)) +
  geom_sf() +
  scale_fill_viridis_c() +
  scale_x_continuous(breaks = seq(from = -180, to = 180, by = 2)) +
  labs(x = "Longitude", y = "Latitude", fill = "R squared")
Figure 18: \(R^2 = 1 - \frac{\sum_t \| \varepsilon (t) \|^2}{\sum_t \| y(t) - \overline y (t) \|^2}\).

Normality assumption

Code
vnt_pc <- pca.fd(c(vnt$t_max), nharm = 5)
pairs(vnt_pc$scores)
mvn(vnt_pc$scores, mvnTest = "mardia")

Homogeneity of variances

Code
vnt_var <- var.fd(c(as.fd(vnt$t_max)))

tt <- seq(
  vnt_var$sbasis$rangeval[1],
  vnt_var$sbasis$rangeval[2],
  length.out = 401
)

z <- eval.bifd(tt, tt, vnt_var) # Covariance
z <- cor.fd(tt, c(as.fd(vnt$t_max))) # Correlation

df <- data.frame(
  x = rep(tt, each = length(tt)),
  y = rep(tt, times = length(tt)),
  z = as.vector(z)
)

ggplot(df, aes(x = x, y = y, z = z)) +
  geom_contour_filled() +
    coord_fixed() +
  labs(
    fill = "Correlation",
    x = "Temperature (deg. Celsius)", y = "Temperature (deg. Celsius)"
  )
Figure 19: Correlation function within the MDR region.

Spatial dependence

Code
library(sf)
library(units)
Code
vnt_sdist <- vnt |>
  left_join(select(vietnam_provinces, !region), by = "province") |>
    st_as_sf() |>
    st_centroid() |>
    st_distance() |>
    set_units("km") |>
    drop_units()
Code
ggplot(reshape2::melt(vnt_sdist), aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_viridis_c(option = "magma") +
    scale_y_reverse() +
  coord_fixed() + labs(fill = "Distance (km)") + theme_void()

# Computing ANOVA residuals
vnt_rmeans <- vnt |>
  select(!province) |>
  group_by(region, region_name) |>
  mutate(t_max = mean(t_max))
fd_obj <- as.fd(c(vnt$t_max - vnt_rmeans$t_max))
n <- nrow(vnt)

# Computing pairwise Bayes distances between residuals
vnt_fdist <- matrix(0, n, n)
for(i in 1:(n-1)){
  for(j in (i+1):n){
    diff_fd <- fd_obj[i] - fd_obj[j]
    vnt_fdist[i,j] <- sqrt(inprod(diff_fd, diff_fd))
    vnt_fdist[j,i] <- vnt_fdist[i,j]
  }
}

ggplot(reshape2::melt(vnt_fdist), aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_viridis_c(option = "magma") +
    scale_y_reverse() +
  coord_fixed() + labs(fill = "Bayes distance") + theme_void()
Figure 20: Spatial distances between centroids of Vietnam provinces, from north to south.
Figure 21: Bayes distances between slope densities of maximum temperature.
Code
library(gstat)
library(dplyr)

variogram_dist <- function(dist_mat, fdist_mat, cutoff = NULL, width = NULL, nbins = 15) {
  n <- nrow(dist_mat)
  
  # Check that both matrices are square and of the same size
  if (!is.matrix(dist_mat) || !is.matrix(fdist_mat) ||
      ncol(dist_mat) != n || nrow(fdist_mat) != n || ncol(fdist_mat) != n) {
    stop("dist_mat and fdist_mat must be square matrices of the same size.")
  }
  
  # Compute gamma matrix from feature-distance matrix
  gamma_mat <- 0.5 * fdist_mat^2
  
  # Default cutoff and bin width similar to gstat
  dmax <- max(dist_mat, na.rm = TRUE)
  if (is.null(cutoff)) cutoff <- dmax / 3
  if (is.null(width)) width <- cutoff / nbins
  
  # Create bin breaks
  breaks <- seq(0, cutoff, by = width)
  if (tail(breaks, 1) < cutoff) breaks <- c(breaks, cutoff)
  
  # Extract upper-triangle indices (i < j)
  idx <- which(upper.tri(dist_mat), arr.ind = TRUE)
  d_ij <- dist_mat[idx]          # spatial distances
  gamma_ij <- gamma_mat[idx]     # computed semivariances
  
  # Assign each pair to a bin
  bin_id <- cut(d_ij, breaks = breaks, include.lowest = TRUE)
  
  # Compute mean gamma per bin
  gamma_means <- tapply(gamma_ij, bin_id, mean, na.rm = TRUE)
  npairs <- tapply(gamma_ij, bin_id, length)
  
  # Bin centers
  bin_centers <- (head(breaks, -1) + tail(breaks, -1)) / 2
  
  # Return as data.frame
  data.frame(
    dist = bin_centers,
    gamma = as.numeric(gamma_means),
    np = as.numeric(npairs)
  )
}

vg <- variogram_dist(vnt_sdist, vnt_fdist)

ggplot(vg, aes(x=dist, y=gamma)) + geom_point() + labs(x = "Distance (km)", y = "Semivariance")
Figure 22: Trace-variogram of group-centered slope densities in the Bayes space.

Reuse

Citation

BibTeX citation:
@article{mondon2025,
  author = {Mondon, Camille and Trinh, Huong Thi and Martín-Fernández,
    Josep Antoni and Thomas-Agnan, Christine},
  title = {Testing the Equality of Mean Densities with an Application to
    Climate Change in {Vietnam}},
  journal = {Stochastic Environmental Research and Risk Assessment},
  date = {2025-12-09},
  langid = {en}
}
For attribution, please cite this work as:
Mondon, Camille, Huong Thi Trinh, Josep Antoni Martín-Fernández, and Christine Thomas-Agnan. 2025. “Testing the Equality of Mean Densities with an Application to Climate Change in Vietnam.” Stochastic Environmental Research and Risk Assessment, December.