## diet.gini.std litters.yr.std bodymass.std range.std
## diet.gini.std 1.00000000 -0.11475751 0.2509206 -0.05533852
## litters.yr.std -0.11475751 1.00000000 -0.2236722 -0.02857648
## bodymass.std 0.25092064 -0.22367218 1.0000000 0.03046890
## range.std -0.05533852 -0.02857648 0.0304689 1.00000000
##
## Pearson's product-moment correlation
##
## data: d$diet.gini.std and d$score_diff_sum.std
## t = -3.9959, df = 159, p-value = 9.837e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4363722 -0.1546375
## sample estimates:
## cor
## -0.3020869
##
## Pearson's product-moment correlation
##
## data: d$litters.yr.std and d$score_diff_sum.std
## t = 3.2542, df = 148, p-value = 0.001409
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1023900 0.4020236
## sample estimates:
## cor
## 0.2584112
##
## Pearson's product-moment correlation
##
## data: d$bodymass.std and d$score_diff_sum.std
## t = -2.7468, df = 161, p-value = 0.006704
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.35379059 -0.05980112
## sample estimates:
## cor
## -0.2115767
##
## Pearson's product-moment correlation
##
## data: d$range.std and d$score_diff_sum.std
## t = 1.5007, df = 160, p-value = 0.1354
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03705508 0.26715576
## sample estimates:
## cor
## 0.1178135
##
## Pearson's product-moment correlation
##
## data: d$range.std and d$score_imp.std
## t = 2.1982, df = 160, p-value = 0.02937
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.01748519 0.31704413
## sample estimates:
## cor
## 0.171219
##
## Pearson's product-moment correlation
##
## data: d$bodymass.std and d$score_imp.std
## t = 1.9347, df = 161, p-value = 0.05478
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.003056719 0.297560323
## sample estimates:
## cor
## 0.1507346
##
## Pearson's product-moment correlation
##
## data: d$litters.yr.std and d$score_imp.std
## t = -1.1232, df = 148, p-value = 0.2632
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24853805 0.06934473
## sample estimates:
## cor
## -0.09193853
##
## Pearson's product-moment correlation
##
## data: d$range.std and d$p_impbiodiv.std
## t = 3.7501, df = 160, p-value = 0.0002466
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1360112 0.4200312
## sample estimates:
## cor
## 0.2842454
##
## Pearson's product-moment correlation
##
## data: d$bodymass.std and d$p_impbiodiv.std
## t = 2.6806, df = 161, p-value = 0.008115
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05471297 0.34931661
## sample estimates:
## cor
## 0.206695
##
## Pearson's product-moment correlation
##
## data: d$litters.yr.std and d$p_impbiodiv.std
## t = -0.7157, df = 148, p-value = 0.4753
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2169481 0.1024977
## sample estimates:
## cor
## -0.05872849
##
## Pearson's product-moment correlation
##
## data: d$diet.gini.std and d$p_impbiodiv.std
## t = 0.62169, df = 159, p-value = 0.535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1062404 0.2023770
## sample estimates:
## cor
## 0.04924363
##
## Pearson's product-moment correlation
##
## data: d$range.std and d$score_imp_ant.std
## t = 1.1452, df = 160, p-value = 0.2538
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06493196 0.24101089
## sample estimates:
## cor
## 0.09016626
##
## Pearson's product-moment correlation
##
## data: d$bodymass.std and d$score_imp_ant.std
## t = 1.2046, df = 161, p-value = 0.2301
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06008204 0.24467708
## sample estimates:
## cor
## 0.09451134
##
## Pearson's product-moment correlation
##
## data: d$litters.yr.std and d$score_imp_ant.std
## t = -1.0479, df = 148, p-value = 0.2964
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24274116 0.07548152
## sample estimates:
## cor
## -0.08581813
##
## Pearson's product-moment correlation
##
## data: d$diet.gini.std and d$score_imp_ant.std
## t = -0.53126, df = 159, p-value = 0.596
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1954965 0.1133181
## sample estimates:
## cor
## -0.04209453
Let’s start by fitting some models to show how biological tratis might affect the spread of invasive alien mammals, in a backwise approach:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=diet.gini.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_diff_a <- brm(score_diff_sum.std ~ diet.gini.std + litters.yr.std + bodymass.std + range.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_diff_a)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std + bodymass.std + range.std
## Data: d (Number of observations: 147)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.08 -0.17 0.12 13100 1.00
## diet.gini.std -0.22 0.07 -0.35 -0.09 13375 1.00
## litters.yr.std 0.17 0.07 0.04 0.30 13119 1.00
## bodymass.std -0.07 0.07 -0.22 0.07 12865 1.00
## range.std 0.07 0.06 -0.05 0.19 13940 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.91 0.06 0.80 1.03 9215 1.00
## alpha 2.79 0.99 1.29 5.08 5157 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_diff_a)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_diff_a)
marginal_effects(mod_diff_a)
bayes_R2(mod_diff_a)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1398817 0.04293955 0.06065537 0.2284317
Range size does not seem to matter too much. Let’s drop it and compare nested models:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=diet.gini.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std))
#mod_diff_b <- brm(score_diff_sum.std ~ diet.gini.std + litters.yr.std + bodymass.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_diff_b)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std + bodymass.std
## Data: d (Number of observations: 148)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.07 -0.17 0.12 11715 1.00
## diet.gini.std -0.23 0.07 -0.36 -0.10 13140 1.00
## litters.yr.std 0.18 0.07 0.04 0.30 11169 1.00
## bodymass.std -0.06 0.07 -0.21 0.08 11494 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.90 0.06 0.80 1.03 10216 1.00
## alpha 2.81 0.90 1.40 4.83 8878 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_diff_b)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_diff_b)
marginal_effects(mod_diff_b)
bayes_R2(mod_diff_b)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1340692 0.04230988 0.05710467 0.2221964
loo(mod_diff_a)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'mod_diff_a'.
## It is recommended to set 'reloo = TRUE' in order to calculate the ELPD
## without the assumption that these observations are negligible. This
## will refit the model 1 times to compute the ELPDs for the problematic
## observations directly.
##
## Computed from 16000 by 147 log-likelihood matrix
##
## Estimate SE
## elpd_loo -191.8 9.2
## p_loo 6.8 1.3
## looic 383.6 18.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 144 98.0% 2513
## (0.5, 0.7] (ok) 2 1.4% 1804
## (0.7, 1] (bad) 1 0.7% 920
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo(mod_diff_b)
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -192.1 9.4
## p_loo 5.7 1.1
## looic 384.1 18.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 146 98.6% 2624
## (0.5, 0.7] (ok) 2 1.4% 1038
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
Body weight does not seem to matter too much. Let’s drop it and compare nested models:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=diet.gini.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std))
#mod_diff_c <- brm(score_diff_sum.std ~ diet.gini.std + litters.yr.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_diff_c)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std
## Data: d (Number of observations: 148)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.07 -0.17 0.12 10377 1.00
## diet.gini.std -0.24 0.06 -0.37 -0.11 10750 1.00
## litters.yr.std 0.19 0.06 0.06 0.31 10356 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.91 0.06 0.80 1.03 8865 1.00
## alpha 2.98 0.97 1.52 5.28 6903 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_diff_c)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_diff_c)
marginal_effects(mod_diff_c)
bayes_R2(mod_diff_c)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1234615 0.03966707 0.04955454 0.2030412
loo(mod_diff_b)
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -192.1 9.4
## p_loo 5.7 1.1
## looic 384.1 18.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 146 98.6% 2624
## (0.5, 0.7] (ok) 2 1.4% 1038
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo(mod_diff_c)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'mod_diff_c'.
## It is recommended to set 'reloo = TRUE' in order to calculate the ELPD
## without the assumption that these observations are negligible. This
## will refit the model 1 times to compute the ELPDs for the problematic
## observations directly.
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -191.4 9.3
## p_loo 5.1 1.1
## looic 382.9 18.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 147 99.3% 2873
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 1 0.7% 231
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
Also the number of litters might be dropped. Let’s drop it and compare nested models:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=diet.gini.std))
#mod_diff_d <- brm(score_diff_sum.std ~ diet.gini.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_diff_d)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std
## Data: d (Number of observations: 164)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.00 0.08 -0.14 0.16 8620 1.00
## diet.gini.std -0.28 0.06 -0.41 -0.16 9460 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.96 0.06 0.85 1.09 7353 1.00
## alpha 3.46 1.03 1.89 5.89 6335 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_diff_d)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_diff_d)
marginal_effects(mod_diff_d)
bayes_R2(mod_diff_d)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.08406786 0.03273582 0.02627928 0.1535229
loo(mod_diff_c)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'mod_diff_c'.
## It is recommended to set 'reloo = TRUE' in order to calculate the ELPD
## without the assumption that these observations are negligible. This
## will refit the model 1 times to compute the ELPDs for the problematic
## observations directly.
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -191.4 9.3
## p_loo 5.1 1.1
## looic 382.9 18.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 147 99.3% 2873
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 1 0.7% 231
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo(mod_diff_d) #litters.yr should be included
##
## Computed from 16000 by 164 log-likelihood matrix
##
## Estimate SE
## elpd_loo -219.3 9.2
## p_loo 3.7 0.8
## looic 438.5 18.4
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
Our best candidate model seems to be the one where diet breadth and the number of litters per year are included as predictors
mod_diff <- mod_diff_c
summary(mod_diff)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std
## Data: d (Number of observations: 148)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.07 -0.17 0.12 10377 1.00
## diet.gini.std -0.24 0.06 -0.37 -0.11 10750 1.00
## litters.yr.std 0.19 0.06 0.06 0.31 10356 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.91 0.06 0.80 1.03 8865 1.00
## alpha 2.98 0.97 1.52 5.28 6903 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayes_R2(mod_diff)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1234615 0.03966707 0.04955454 0.2030412
Let’s add a random-intercept term and a covariance matrix to account for phylogenetic correlations:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=diet.gini.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std))
#mod_diff_RIautocor <- brm(score_diff_sum.std ~ diet.gini.std + litters.yr.std + (1|sciname),
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx, cov_ranef=list(phylo = m))
summary(mod_diff_RIautocor)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std + (1 | sciname)
## Data: d (Number of observations: 148)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Group-Level Effects:
## ~sciname (Number of levels: 148)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.33 0.18 0.02 0.67 566 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.07 -0.17 0.12 17623 1.00
## diet.gini.std -0.24 0.07 -0.37 -0.11 15570 1.00
## litters.yr.std 0.19 0.06 0.06 0.31 10247 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.82 0.11 0.58 0.99 505 1.01
## alpha 4.29 1.98 1.68 9.22 2220 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_diff_RIautocor)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_diff_RIautocor)
marginal_effects(mod_diff_RIautocor)
bayes_R2(mod_diff_RIautocor)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.2721068 0.1379484 0.08677592 0.5984601
loo(mod_diff)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'mod_diff'. It
## is recommended to set 'reloo = TRUE' in order to calculate the ELPD without
## the assumption that these observations are negligible. This will refit
## the model 1 times to compute the ELPDs for the problematic observations
## directly.
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -191.4 9.3
## p_loo 5.1 1.1
## looic 382.9 18.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 147 99.3% 2873
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 1 0.7% 231
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo(mod_diff_RIautocor)
## Warning: Found 30 observations with a pareto_k > 0.7 in model
## 'mod_diff_RIautocor'. With this many problematic observations, it may be
## more appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold
## cross-validation rather than LOO.
##
## Computed from 16000 by 148 log-likelihood matrix
##
## Estimate SE
## elpd_loo -187.3 9.2
## p_loo 32.1 3.4
## looic 374.6 18.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 68 45.9% 245
## (0.5, 0.7] (ok) 50 33.8% 28
## (0.7, 1] (bad) 30 20.3% 15
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
Model fitting improves. We might retain this model as the best candidate one:
summary(mod_diff_RIautocor)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_diff_sum.std ~ diet.gini.std + litters.yr.std + (1 | sciname)
## Data: d (Number of observations: 148)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Group-Level Effects:
## ~sciname (Number of levels: 148)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.33 0.18 0.02 0.67 566 1.01
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -0.03 0.07 -0.17 0.12 17623 1.00
## diet.gini.std -0.24 0.07 -0.37 -0.11 15570 1.00
## litters.yr.std 0.19 0.06 0.06 0.31 10247 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.82 0.11 0.58 0.99 505 1.01
## alpha 4.29 1.98 1.68 9.22 2220 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Modelling impacts does not seem to be possible. Predictors have an almost null impact over model predictions:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_imp_a <- brm(score_imp.std ~ range.std + bodymass.std + litters.yr.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_imp_a)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_imp.std ~ range.std + bodymass.std + litters.yr.std
## Data: d (Number of observations: 149)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.02 0.07 -0.10 0.17 7574 1.00
## range.std 0.06 0.04 -0.02 0.13 10069 1.00
## bodymass.std 0.10 0.07 -0.05 0.22 9889 1.00
## litters.yr.std -0.03 0.06 -0.14 0.07 10271 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.91 0.05 0.81 1.02 7172 1.00
## alpha 9.45 2.36 5.45 14.67 9928 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_imp_a)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_imp_a)
marginal_effects(mod_imp_a)
bayes_R2(mod_imp_a) #Insufficient base
## Estimate Est.Error Q2.5 Q97.5
## R2 0.02468841 0.01631327 0.00243705 0.06238457
Again, impacts over human activities do not seem to be predictable, based on covariates of interest:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=diet.gini.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_imp_ant_a <- brm(score_imp_ant.std ~ range.std + bodymass.std + litters.yr.std + diet.gini.std,
# data = d, family = skew_normal(link = "identity", link_sigma = "log", link_alpha = "identity"),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_imp_ant_a)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: score_imp_ant.std ~ range.std + bodymass.std + litters.yr.std + diet.gini.std
## Data: d (Number of observations: 147)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.06 0.07 -0.07 0.20 9722 1.00
## range.std 0.03 0.04 -0.05 0.09 12991 1.00
## bodymass.std 0.07 0.07 -0.07 0.19 10604 1.00
## litters.yr.std -0.03 0.05 -0.14 0.06 8990 1.00
## diet.gini.std 0.00 0.04 -0.08 0.09 11404 1.00
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 0.87 0.05 0.77 0.98 9190 1.00
## alpha 10.71 2.52 6.28 16.06 9566 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_imp_ant_a)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_imp_ant_a)
marginal_effects(mod_imp_ant_a)
bayes_R2(mod_imp_ant_a) #Insufficient base
## Estimate Est.Error Q2.5 Q97.5
## R2 0.01724659 0.01256932 0.001715464 0.04805096
Let’s try to see if covariates of interest affect the score assigned to the potential impacts on biodiversity:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std),
# prior(normal(0,0.5), class=b, coef=diet.gini.std))
#mod_biodiv_a <- brm(p_impbiodiv ~ range.std + bodymass.std + litters.yr.std + diet.gini.std,
# data = d, family = cumulative( link = "logit", link_disc = "log", threshold = c("flexible")),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_biodiv_a)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: p_impbiodiv ~ range.std + bodymass.std + litters.yr.std + diet.gini.std
## Data: d (Number of observations: 147)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -5.57 1.22 -8.55 -3.74 5690 1.00
## Intercept[2] -0.10 0.17 -0.43 0.24 15963 1.00
## Intercept[3] 1.28 0.20 0.89 1.69 15358 1.00
## Intercept[4] 2.87 0.35 2.22 3.61 16247 1.00
## range.std 0.41 0.15 0.12 0.71 15872 1.00
## bodymass.std 0.34 0.17 0.00 0.69 14515 1.00
## litters.yr.std -0.01 0.15 -0.30 0.27 14790 1.00
## diet.gini.std -0.01 0.15 -0.30 0.27 14678 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_biodiv_a)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_biodiv_a)
marginal_effects(mod_biodiv_a)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
bayes_R2(mod_biodiv_a)
## Warning: Predictions are treated as continuous variables in 'bayes_R2'
## which is likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1031117 0.03972968 0.03203424 0.1851804
The level of dietary breadth does not seem to be important. Let’s try to drop it and compare nested models:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_biodiv_b <- brm(p_impbiodiv ~ range.std + bodymass.std + litters.yr.std,
# data = d, family = cumulative( link = "logit", link_disc = "log", threshold = c("flexible")),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_biodiv_b)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: p_impbiodiv ~ range.std + bodymass.std + litters.yr.std
## Data: d (Number of observations: 149)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -5.55 1.22 -8.48 -3.72 6127 1.00
## Intercept[2] -0.10 0.17 -0.43 0.23 16379 1.00
## Intercept[3] 1.25 0.20 0.86 1.65 15082 1.00
## Intercept[4] 2.86 0.35 2.22 3.58 13778 1.00
## range.std 0.40 0.15 0.11 0.70 13626 1.00
## bodymass.std 0.34 0.16 0.02 0.66 13583 1.00
## litters.yr.std -0.01 0.15 -0.30 0.27 14708 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_biodiv_b)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_biodiv_b)
marginal_effects(mod_biodiv_b)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
bayes_R2(mod_biodiv_b)
## Warning: Predictions are treated as continuous variables in 'bayes_R2'
## which is likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.096462 0.03947579 0.02661379 0.1779594
The number of litters per year does not seem to be important. Let’s try to drop it and compare nested models:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_biodiv_c <- brm(p_impbiodiv ~ range.std + bodymass.std,
# data = d, family = cumulative( link = "logit", link_disc = "log", threshold = c("flexible")),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx)
summary(mod_biodiv_c)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: p_impbiodiv ~ range.std + bodymass.std
## Data: d (Number of observations: 162)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -5.69 1.22 -8.57 -3.85 5627 1.00
## Intercept[2] -0.10 0.16 -0.42 0.22 15816 1.00
## Intercept[3] 1.28 0.19 0.91 1.66 15032 1.00
## Intercept[4] 2.85 0.34 2.23 3.56 13015 1.00
## range.std 0.45 0.15 0.16 0.75 14171 1.00
## bodymass.std 0.37 0.16 0.06 0.68 12444 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_biodiv_c)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_biodiv_c)
marginal_effects(mod_biodiv_c)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
bayes_R2(mod_biodiv_c)
## Warning: Predictions are treated as continuous variables in 'bayes_R2'
## which is likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.1052031 0.03952858 0.03304565 0.1860023
loo(mod_biodiv_a)
## Warning: Found 1 observations with a pareto_k > 0.7 in model
## 'mod_biodiv_a'. It is recommended to set 'reloo = TRUE' in order to
## calculate the ELPD without the assumption that these observations are
## negligible. This will refit the model 1 times to compute the ELPDs for the
## problematic observations directly.
##
## Computed from 16000 by 147 log-likelihood matrix
##
## Estimate SE
## elpd_loo -182.2 9.0
## p_loo 9.0 2.1
## looic 364.3 18.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 146 99.3% 2649
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 1 0.7% 107
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo(mod_biodiv_b)
## Warning: Found 1 observations with a pareto_k > 0.7 in model
## 'mod_biodiv_b'. It is recommended to set 'reloo = TRUE' in order to
## calculate the ELPD without the assumption that these observations are
## negligible. This will refit the model 1 times to compute the ELPDs for the
## problematic observations directly.
##
## Computed from 16000 by 149 log-likelihood matrix
##
## Estimate SE
## elpd_loo -184.4 9.3
## p_loo 8.6 2.5
## looic 368.8 18.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 148 99.3% 3387
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 1 0.7% 11
## See help('pareto-k-diagnostic') for details.
loo(mod_biodiv_c)
## Warning: Found 1 observations with a pareto_k > 0.7 in model
## 'mod_biodiv_c'. It is recommended to set 'reloo = TRUE' in order to
## calculate the ELPD without the assumption that these observations are
## negligible. This will refit the model 1 times to compute the ELPDs for the
## problematic observations directly.
##
## Computed from 16000 by 162 log-likelihood matrix
##
## Estimate SE
## elpd_loo -196.0 9.5
## p_loo 7.5 2.0
## looic 392.1 18.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 161 99.4% 2718
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 1 0.6% 57
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
The best candidate model seems to be the one including range size, body mass and the number of litters per year:
mod_biodiv <- mod_biodiv_b
summary(mod_biodiv)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: p_impbiodiv ~ range.std + bodymass.std + litters.yr.std
## Data: d (Number of observations: 149)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -5.55 1.22 -8.48 -3.72 6127 1.00
## Intercept[2] -0.10 0.17 -0.43 0.23 16379 1.00
## Intercept[3] 1.25 0.20 0.86 1.65 15082 1.00
## Intercept[4] 2.86 0.35 2.22 3.58 13778 1.00
## range.std 0.40 0.15 0.11 0.70 13626 1.00
## bodymass.std 0.34 0.16 0.02 0.66 13583 1.00
## litters.yr.std -0.01 0.15 -0.30 0.27 14708 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Let’s add a random intercept and a covariate matrix, representing phylogenetic correlations among species:
#prior.idx <- c(prior(normal(0,0.5), class=b, coef=bodymass.std),
# prior(normal(0,0.5), class=b, coef=litters.yr.std),
# prior(normal(0,0.5), class=b, coef=range.std))
#mod_biodiv_RIautocor <- brm(p_impbiodiv ~ range.std + bodymass.std + litters.yr.std + (1|sciname),
# data = d, family = cumulative( link = "logit", link_disc = "log", threshold = c("flexible")),
# chains=4, cores=4, iter=5000, warmup=1000, inits="0",
# control = list(adapt_delta = 0.9999, max_treedepth=11),
# prior=prior.idx, cov_ranef=list(phylo = m))
summary(mod_biodiv_RIautocor)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: p_impbiodiv ~ range.std + bodymass.std + litters.yr.std + (1 | sciname)
## Data: d (Number of observations: 149)
## Samples: 4 chains, each with iter = 5000; warmup = 1000; thin = 1;
## total post-warmup samples = 16000
##
## Group-Level Effects:
## ~sciname (Number of levels: 149)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 5.37 3.11 1.27 12.81 524 1.00
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -14.72 7.44 -33.57 -5.85 567 1.00
## Intercept[2] -0.31 0.68 -1.86 0.92 3365 1.00
## Intercept[3] 4.19 2.18 1.52 9.60 608 1.00
## Intercept[4] 8.90 4.43 3.44 19.58 565 1.00
## range.std 0.71 0.34 0.04 1.41 4120 1.00
## bodymass.std 0.50 0.35 -0.18 1.19 6294 1.00
## litters.yr.std -0.06 0.34 -0.74 0.61 7419 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_biodiv_RIautocor)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
plot(mod_biodiv_RIautocor)
marginal_effects(mod_biodiv_RIautocor)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default which is likely invalid for ordinal families.
## Please set 'categorical' to TRUE.
bayes_R2(mod_biodiv_RIautocor)
## Warning: Predictions are treated as continuous variables in 'bayes_R2'
## which is likely invalid for ordinal families.
## Estimate Est.Error Q2.5 Q97.5
## R2 0.7605584 0.1501727 0.3393737 0.9333137
There seems to be a major improvement in model accuracy:
loo(mod_biodiv)
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'mod_biodiv'.
## It is recommended to set 'reloo = TRUE' in order to calculate the ELPD
## without the assumption that these observations are negligible. This
## will refit the model 1 times to compute the ELPDs for the problematic
## observations directly.
##
## Computed from 16000 by 149 log-likelihood matrix
##
## Estimate SE
## elpd_loo -184.4 9.3
## p_loo 8.6 2.5
## looic 368.8 18.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 148 99.3% 3387
## (0.5, 0.7] (ok) 0 0.0% <NA>
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 1 0.7% 11
## See help('pareto-k-diagnostic') for details.
loo(mod_biodiv_RIautocor)
## Warning: Found 144 observations with a pareto_k > 0.7 in model
## 'mod_biodiv_RIautocor'. With this many problematic observations, it may be
## more appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold
## cross-validation rather than LOO.
##
## Computed from 16000 by 149 log-likelihood matrix
##
## Estimate SE
## elpd_loo -166.3 7.5
## p_loo 105.9 5.9
## looic 332.5 14.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 0 0.0% <NA>
## (0.5, 0.7] (ok) 5 3.4% 66
## (0.7, 1] (bad) 143 96.0% 4
## (1, Inf) (very bad) 1 0.7% 3
## See help('pareto-k-diagnostic') for details.
Also, R-squared, calculated as suggested here (https://link.springer.com/article/10.1007/BF01102759), seems to improve massively:
hist(bayes_R2_mz(mod_biodiv))
## Estimate Est.Error Q2.5 Q97.5
## R2_mz 0.09900468 0.04471913 0.02652411 0.1995771
median(bayes_R2_mz(mod_biodiv))
## Estimate Est.Error Q2.5 Q97.5
## R2_mz 0.09900468 0.04471913 0.02652411 0.1995771
## [1] 0.09438552
hist(bayes_R2_mz(mod_biodiv_RIautocor))
## Estimate Est.Error Q2.5 Q97.5
## R2_mz 0.8326469 0.1518792 0.3922102 0.9803348
median(bayes_R2_mz(mod_biodiv_RIautocor))
## Estimate Est.Error Q2.5 Q97.5
## R2_mz 0.8326469 0.1518792 0.3922102 0.9803348
## [1] 0.8794856