#Housekeeping

##Packages

library(readr)
library(tidyverse)
library(lme4)
library(MASS)
library(MuMIn)
library(vegan)
library(fitdistrplus)
library(car)
library(bbmle)
library(arm)
library(corrplot)
library(ade4)
library(lmtest)
library(RColorBrewer)
library(plyr)
library(ggplot2)
library(gridExtra)
library(caret)
library(glmnet)

Session info

SI<-sessionInfo()
print(SI, locale = FALSE)
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] glmnet_4.0-2       caret_6.0-86       gridExtra_2.3      plyr_1.8.6        
##  [5] RColorBrewer_1.1-2 lmtest_0.9-37      zoo_1.8-8          ade4_1.7-15       
##  [9] corrplot_0.84      arm_1.11-2         bbmle_1.0.23.1     car_3.0-9         
## [13] carData_3.0-4      fitdistrplus_1.1-1 survival_3.1-12    vegan_2.5-6       
## [17] lattice_0.20-41    permute_0.9-5      MuMIn_1.43.17      MASS_7.3-51.6     
## [21] lme4_1.1-23        Matrix_1.2-18      forcats_0.5.0      stringr_1.4.0     
## [25] dplyr_1.0.2        purrr_0.3.4        tidyr_1.1.1        tibble_3.0.3      
## [29] ggplot2_3.3.2      tidyverse_1.3.0    readr_1.3.1       
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4          colorspace_1.4-1     class_7.3-17        
##  [4] ellipsis_0.3.1       rio_0.5.16           htmlTable_2.0.1     
##  [7] base64enc_0.1-3      fs_1.5.0             rstudioapi_0.11     
## [10] prodlim_2019.11.13   fansi_0.4.1          mvtnorm_1.1-1       
## [13] lubridate_1.7.9      xml2_1.3.2           codetools_0.2-16    
## [16] splines_4.0.2        knitr_1.29           Formula_1.2-3       
## [19] jsonlite_1.7.0       nloptr_1.2.2.2       pROC_1.16.2         
## [22] broom_0.7.0          cluster_2.1.0        dbplyr_1.4.4        
## [25] png_0.1-7            compiler_4.0.2       httr_1.4.2          
## [28] backports_1.1.9      assertthat_0.2.1     cli_2.0.2           
## [31] htmltools_0.5.0      tools_4.0.2          coda_0.19-3         
## [34] gtable_0.3.0         glue_1.4.1           reshape2_1.4.4      
## [37] Rcpp_1.0.5           cellranger_1.1.0     vctrs_0.3.2         
## [40] nlme_3.1-148         iterators_1.0.12     timeDate_3043.102   
## [43] gower_0.2.2          xfun_0.16            openxlsx_4.1.5      
## [46] rvest_0.3.6          lifecycle_0.2.0      statmod_1.4.34      
## [49] scales_1.1.1         ipred_0.9-9          hms_0.5.3           
## [52] parallel_4.0.2       yaml_2.2.1           curl_4.3            
## [55] bdsmatrix_1.3-4      rpart_4.1-15         latticeExtra_0.6-29 
## [58] stringi_1.4.6        foreach_1.5.0        checkmate_2.0.0     
## [61] boot_1.3-25          zip_2.1.0            shape_1.4.4         
## [64] lava_1.6.7           rlang_0.4.7          pkgconfig_2.0.3     
## [67] evaluate_0.14        recipes_0.1.13       htmlwidgets_1.5.1   
## [70] tidyselect_1.1.0     magrittr_1.5         R6_2.4.1            
## [73] generics_0.0.2       Hmisc_4.4-1          DBI_1.1.0           
## [76] pillar_1.4.6         haven_2.3.1          foreign_0.8-80      
## [79] withr_2.2.0          mgcv_1.8-31          abind_1.4-5         
## [82] nnet_7.3-14          modelr_0.1.8         crayon_1.3.4        
## [85] rmarkdown_2.3        jpeg_0.1-8.1         grid_4.0.2          
## [88] readxl_1.3.1         data.table_1.13.0    blob_1.2.1          
## [91] ModelMetrics_1.2.2.2 reprex_0.3.0         digest_0.6.25       
## [94] numDeriv_2016.8-1.1  munsell_0.5.0

Import

Abundance and richness data

Combined <- read.csv("Combined.csv")
row.names(Combined)<-Combined$Site
Combined$Site<-NULL
Combined.species<-Combined[19,]
Combined<-Combined[-c(19),]
Combined<-mutate_all(Combined, function(x) as.numeric(as.character(x)))

Distance matrix

# from previous work, calculated from plot to plot. 
distmat <- matrix(c(0,5.17,1.53,3.05,1.44,3.27,0.73,0.93,0.37,3.85,20.43,2.36,3.02,6.04,7.06,8.44,16.98,8.41,5.17,0,5.67,3.42,4.27,2.94,5.81,5.40,5.33,3.22,17.43,4.74,2.14,3.11,2.03,5.31,14.07,6.57,1.53,5.67,0,2.69,1.41,3.09,1.19,0.60,1.16,3.44,19.56,1.41,3.58,7.16,7.34,9.63,16.12,9.79,3.05,3.42,2.69,0,1.63,0.48,3.38,2.71,2.97,0.8,17.43,1.43,1.89,5.79,4.78,8.27,13.98,8.99,1.44,4.27,1.41,1.63,0,1.90,1.78,1.18,1.34,2.43,19.01,1.15,2.17,5.86,6.00,8.35,15.55,8.68,3.27,2.94,3.09,0.48,1.90,0,3.67,3.04,3.23,0.71,17.34,1.89,1.56,5.39,4.32,7.85,13.88,8.64,0.73,5.81,1.19,3.38,1.78,3.67,0,0.73,0.48,4.18,20.60,2.39,3.66,6.76,7.66,9.16,17.16,9.09,0.93,5.40,0.6,2.71,1.18,3.04,0.73,0,0.57,3.51,19.87,1.66,3.26,6.68,7.17,9.14,16.42,9.24,0.37,5.33,1.16,2.97,1.34,3.23,0.48,0.57,0,3.77,20.28,2.12,3.17,6.36,7.18,8.77,16.83,8.77,3.85,3.22,3.44,0.8,2.43,0.71,4.18,3.51,3.77,0,16.66,2.09,2.21,5.91,4.27,8.32,13.21,9.22,20.43,17.43,19.56,17.43,19.01,17.34,20.60,19.87,20.28,16.66,0,18.21,18.40,20.22,15.84,21.46,3.45,23.39,2.36,4.74,1.41,1.43,1.15,1.89,2.39,1.66,2.12,2.09,18.21,0,2.87,6.76,6.21,9.27,14.77,9.74,3.02,2.14,3.58,1.89,2.17,1.56,3.66,3.26,3.17,2.21,18.40,2.87,0,3.95,4.05,6.43,14.99,7.12,6.04,3.11,7.16,5.79,5.86,5.39,6.76,6.68,6.36,5.91,20.22,6.76,3.95,0,4.36,2.51,16.92,3.46,7.06,2.03,7.34,4.78,6,4.32,7.66,7.17,7.18,4.27,15.84,6.21,4.05,4.36,0,5.92,12.57,7.63,8.44,5.31,9.63,8.27,8.35,7.85,9.16,9.14,8.77,8.32,21.46,9.27,6.43,2.51,5.92,0,18.29,2.13,16.98,14.07,16.12,13.98,15.55,13.88,17.16,16.42,16.83,13.21,3.45,14.77,14.99,16.92,12.57,18.29,0,20.16,8.41,6.57,9.79,8.99,8.68,8.64,9.09,9.24,8.77,9.22,23.39,9.74,7.12,3.46,7.63,2.13,20.16,0), nrow = 18, ncol = 18, byrow = TRUE)

Native and non native assignment

non.native_USDA <- read.csv("non-native_USDA.csv")

Explanatory variables

Make DB

Percent_area_Zone <- read.csv("Percent_area_Zone.csv")
totalA <- aggregate(area~descriptio,Percent_area_Zone,sum)
Percent_area_Zone<-merge(Percent_area_Zone,totalA, by="descriptio")
Percent_area_Zone$Per_area<-(Percent_area_Zone$area.x/Percent_area_Zone$area.y)*100
Percent_area_Zone$Name<-NULL
Percent_area_Zone$area.x<-NULL
Percent_area_Zone$area.y<-NULL
Percent_area_Zone$perimeter<-NULL
Zone_cover<-tidyr::spread(Percent_area_Zone,key="CLASS",value= "Per_area")
Zone_cover[is.na(Zone_cover)] <- 0
colnames(Zone_cover) <- c( "Site", "Airport_cover", "Commercial_cover", "Rural_cover", "Environment_cover", "Institution_cover","Minning_cover","Park_cover","Residential_cover")


Dist_to_Roads<- read.csv("Dist_to_roads.csv")
Dist_to_Roads<-tidyr::spread(Dist_to_Roads,key="id_2",value= "distance")
colnames(Dist_to_Roads)<-c("Site","Great_Northern_Rd","HW_17","HW_75","Meeting_Point","Trunk_Road")


IBT_data_1 <- read.csv("IBT_Data.csv")
IBT_data_1$Site<-as.factor(IBT_data_1$Site)

IBT_data_year <- read.csv("Island_surrounding_build_year_kat.csv")
colnames(IBT_data_year)[2]<-"Site"
IBT_data_year$Site<-as.factor(IBT_data_year$Site)
length(IBT_data_year$YearBuilt2)
## [1] 3287
IBT_data_year %>% tidyr::drop_na()->IBT_data_year
length(IBT_data_year$YearBuilt2)
## [1] 2984
IBT_data_year %>% dplyr::group_by(Site) %>% dplyr::summarise(Newest = max(YearBuilt2), Oldest= min(YearBuilt2),Average_building_age= mean(YearBuilt2), SD_building_age = sd(YearBuilt2) ,DiffYears=length(unique(YearBuilt2)))->IBT_data_events
## `summarise()` ungrouping output (override with `.groups` argument)
IBT_data_1[is.na(IBT_data_1$PopulationDensity2),]$PopulationDensity2<-0
IBT_data_1$Av_PopDens<-(IBT_data_1$PopulationDensity2+IBT_data_1$PopulationDensity1)/2
IBT_data_1$PopulationDensity1<-NULL
IBT_data_1$PopulationDensity2<-NULL



IBT_data<-IBT_data_1
IBT_data<-merge(IBT_data_1,Zone_cover,by="Site")
IBT_data<-merge(IBT_data,IBT_data_events,by="Site")

Var selection

colnames(IBT_data)
##  [1] "Site"                 "Name"                 "Site_number"         
##  [4] "Area_m2"              "Perimeter_m"          "Distance_m"          
##  [7] "Closest_Road"         "sp_rich"              "Diversity"           
## [10] "Evenness"             "Exotic_Native_Ratio"  "Native"              
## [13] "Exotic"               "per_exo"              "sw_div"              
## [16] "per_nat"              "Av_PopDens"           "Airport_cover"       
## [19] "Commercial_cover"     "Rural_cover"          "Environment_cover"   
## [22] "Institution_cover"    "Minning_cover"        "Park_cover"          
## [25] "Residential_cover"    "Newest"               "Oldest"              
## [28] "Average_building_age" "SD_building_age"      "DiffYears"
IBT_data$P_A_ratio<-IBT_data$Perimeter_m/IBT_data$Area_m2
IBT_data$Site<-rownames(IBT_data)

Y<-IBT_data[c(4,5:7,17,19,20,21,25,28,31)]
covar_cor = cor(Y)
write.csv(covar_cor,"covar_cor.csv")
mydata.rcorr = Hmisc::rcorr(as.matrix(Y))
covar_cor.p = mydata.rcorr$P
corrplot.mixed(covar_cor, upper = "ellipse", lower = "number")

IBT_PCA = dudi.pca(df = Y, scannf = FALSE, nf = 2, center = TRUE, scale = TRUE)
IBT_PCA$co
##                            Comp1       Comp2
## Area_m2              -0.83055978 -0.23118749
## Perimeter_m          -0.92049970 -0.10852220
## Distance_m            0.39930461 -0.77326984
## Closest_Road         -0.50651299 -0.73943756
## Av_PopDens           -0.62280650  0.29502854
## Commercial_cover      0.22103993 -0.20630908
## Rural_cover          -0.91910930  0.10673178
## Environment_cover    -0.58573112  0.24184569
## Residential_cover     0.85690525  0.18060601
## Average_building_age -0.07559943  0.82093034
## P_A_ratio             0.86303462  0.02170061
s.corcircle(IBT_PCA$co)