require(RODBC) sws <- odbcConnect("swsartifex") # Tunnel to artifex port 3306 require(lattice) require(sp) require(ade4) source("http://anthony.darrouzet-nardi.net/works/adnlattice.R") source("/Users/anthony/sci/sws/code/R Code/veg.sum.R") source("/Users/anthony/sci/sws/code/R Code/veg.ord.R") require(randomForest) # the objects vs.ord, vr.ord and vs.ord, and vr.ord must be in there panel.xylo = function(x,y,...) { panel.xyplot(x,y,pch = 16, cex = 0.5, col = ...) panel.loess(x,y, col = "black", lwd = 1.5) } panel.xylog = function(x,y,...) { panel.xyplot(x,y,pch = c(16,17,15,16,16), col = c("black","forestgreen","red","purple", "brown"), ...) panel.loess(x,y, col = "red", lwd = 1.5) } panel.xylog = function(x,y,...) { panel.xyplot(x,y,pch = c(16,2,8,3,9), col = "black", cex = 1.15, ...) panel.loess(x,y, col = "black", lwd = 1.5) } logtick <- function(y=NULL,x=NULL,add = c(1,1)) { if (!is.null(y)) mi.y <- min(y,na.rm = T) if (!is.null(y)) ma.y <- max(y,na.rm = T) if (!is.null(x)) mi.x <- min(x,na.rm = T) if (!is.null(x)) ma.x <- max(x,na.rm = T) li.y <- NULL if (!is.null(y)) li.y <- list(y = list(at = c(mi.y,(ma.y-mi.y)/3+mi.y,(ma.y-mi.y)/3*2+mi.y,ma.y), labels = signif(exp(c(mi.y,(ma.y-mi.y)/3+mi.y,(ma.y-mi.y)/3*2+mi.y,ma.y))-add[1],2))) li.x <- NULL if (!is.null(x)) li.x <- list(x = list(at = c(mi.x,(ma.x-mi.x)/3+mi.x,(ma.x-mi.x)/3*2+mi.x,ma.x), labels = signif(exp(c(mi.x,(ma.x-mi.x)/3+mi.x,(ma.x-mi.x)/3*2+mi.x,ma.x))-add[2],2))) c(li.y,li.x) } #creating data table sb <- sqlQuery(sws, "SELECT * FROM sb") levels(sb$cover) <- c("open", "tree", "shrub") sb <- merge(sb, vs.sum, all = T) sb <- merge(sb, vs.ord, all = T) sb$net_min[sb$net_min < -1 & is.na(sb$net_min) == F] <- -0.740 # 3rd lowest value, not outlier sb$net_min[sb$net_min > 4 & is.na(sb$net_min) == F] <- 3.720 # 3rd highest value, not outlier sb$lognet_min <- log(sb$net_min+1.74) sb$lognet_nit <- log(sb$net_nit+1.946) sb$logNH4_jul07 <- log(sb$NH4_jul07+1) sb$logNO3_jul07 <- log(sb$NO3_jul07+1) sb$logIN_jul07 <- log(sb$IN_jul07+1) sb$logNH4_709 <- log(sb$NH4_709+1) sb$logNO3_709 <- log(sb$NO3_709+0.1) sb$logIN_709 <- log(sb$IN_709+1) sb$logNH4_730 <- log(sb$NH4_730+1) sb$logNO3_730 <- log(sb$NO3_730+0.1) sb$logIN_730 <- log(sb$IN_730+1) #downsampling trees and willows sb.ds <- sb[c(which(sb$cover == "tree")[round(1:(length(which(sb$cover == "tree"))/1.3)*1.3)], which(sb$cover == "shrub")[round(1:(length(which(sb$cover == "shrub"))/2.2)*2.2)], which(sb$cover == "open")),] res <- sqlQuery(sws, "SELECT * FROM res") res$cover[res$cover == 'willow'] <- "shrub" ; res$cover <- ordered(res$cover, c("open", "tree", "shrub")) res <- merge(res, vr.sum, all = T) res <- merge(res, vr.ord, all = T) res.tc <- (res$Picea_engelmannii+res$Abies_lasiocarpa+res$Pinus_flexilis) res$unvegetated[res$cover == "tree" & !is.na(res$cover)] <- ((res$total_cover-res.tc+100)*-0.9556+194.6022)[res$cover == "tree" & !is.na(res$cover)] res$unvegetated[res$unvegetated < 0] <- 0 res$logNO3 <- log(res$mean_NO3 + 1) res$logNH4 <- log(res$mean_NH4 + 1) res$logIN <- log(res$mean_IN + 1) #downsampling trees and willows res.ds <- res[res$resin %in% which(res$cover == "tree")[round(1:(length(which(res$cover == "tree"))/1.8)*1.8)] | res$resin %in% which(res$cover == "shrub")[round(1:(length(which(res$cover == "shrub"))/6)*6)] | res$cover == "open",] # functions for random forest calculations and reporting Nstats <- function(da,ra,pa, ntree = 500, important = FALSE) { results <- list() for (t in 1:length(da)) { mat <- matrix(nrow = length(ra), ncol = length(names(pa)), dimnames = list(response = ra, predictors = names(pa))) for (u in 1:length(ra)) { for (v in 1:length(pa)) { dat <- na.omit(da[[t]][,c(match(ra[[u]], names(da[[t]])),match(pa[[v]], names(da[[t]])))]) p <- pa[[v]] f <- as.formula(paste(ra[[u]], " ~ ", paste(pa[[v]], collapse=" + "))) rf <- randomForest(f,data=dat,ntree=ntree,importance = T) if (sum(importance(rf)[,1] > 1) > 3) {p <- row.names(importance(rf)[importance(rf)[,1] > 1,]) rf <- tuneRF(dat[,match(p, names(dat))], dat[,1], ntreeTry=ntree, stepFactor=1.5, improve = 0.001, trace = FALSE, doBest = TRUE, importance = T)} mat[u,v] <- round(rf$rsq[rf$ntree]*100,1) if (important) { cat("\n") cat("\n") cat(paste(names(da)[[t]], ": ", ra[[u]])) cat("\n") cat(paste("mtry: ", rf$mtry)) cat("\n") cat(paste("sample size: ", length(rf$y))) cat("\n") cat(paste("% Var explained: ",round(rf$rsq[rf$ntree]*100,1))) cat("\n") print(importance(rf)[rev(order(importance(rf)[,1])),]) } }} results[[t]] <- mat names(results)[t] <- names(da)[t] for (t in 1:length(results)) { results[[t]][results[[t]] < 0] <- 0 } } results } ## VARIABLES #All, open, tree data frames sbds.aot <- list(all = sb.ds, open = sb[sb$cover == "open",], trees = sb[sb$cover == "trees",]) res.aot <- list(all = res.ds, open = res[res$cover == "open",], trees = res[res$cover == "tree",]) sbds.a <- list(all = sb.ds) resds.a <- list(all = res.ds) sbds.aot <- list(all = sb, open = sb[sb$cover == "open",], trees = sb[sb$cover == "trees",]) res.aot <- list(all = res, open = res[res$cover == "open",], trees = res[res$cover == "tree",]) sb.a <- list(all = sb) res.a <- list(all = res) #response COREjul07 <- list( "lognet_min", "lognet_nit","logNH4_jul07", "logNO3_jul07", "logIN_jul07") CORE730 <- list("percentn", "singlecoreCN", "logNH4_730", "logNO3_730", "logIN_730") CORE709 <- list( "logNH4_709", "logNO3_709", "logIN_709") COMPOSITE <- list("total_percentn", "heavy_pn_core", "light_pn_core","total_del15n", "heavy_del15n", "light_del15n", "total_C_to_N", "heavy_C_to_N", "light_C_to_N") RES <- list("logNO3", "logNH4", "logIN") #abiotic topo <- c("slope", "aspect", "elevation") core_abiotic_field <- c(topo, "temp_resid_field", "moist_krige", "snow_depth", "pH") core_abiotic_krige <- c(topo, "temp_resid_krige", "moist_krige", "snow_depth", "pH") core_abiotic_709 <- c(topo, "temp_resid_field", "moist_field_709", "snow_depth", "pH") core_abiotic_730 <- c(topo, "temp_resid_field", "moist_field_730", "snow_depth", "pH") hydro_field <- c("temp_resid_field", "moist_krige", "snow_depth") hydro_krige <- c("temp_resid_krige", "moist_krige", "snow_depth") hydro_709 <- c("temp_resid_field", "moist_field_709", "snow_depth") hydro_730 <- c("temp_resid_field", "moist_field_730", "snow_depth") #biotic cover <- "cover" jacc2 <- c("cover", "unvegetated", "ANJ2.1", "ANJ2.2") canb2 <- c("cover", "unvegetated", "ANC2.1", "ANC2.2") plants <- c("Vaccinium_sp", "Carex_rupestris", "Caltha_leptosepala", "Geum_rossii", "Deschampsia_caespitosa", "Juncus_drummondii", "Minuartia_obtusiloba", "Oreoxis_alpina", "Selaginella_densa", "Trifolium_dasyphyllum", "Danthonia_intermedia", "Erigeron_peregrinus", "moss", "Trollius_laxus", "Achillea_millefolium", "Phlox_pulvinata", "Salix_sp", "Kobresia_myosuroides", "Carex_scopulorum", "Antennaria_alpina", "Sibbaldia_procumbens", "Arenaria_fendleri", "Silene_acaulis", "Arnica_cordifolia", "Trifolium_parryi", "Potentilla_diversifolia", "Carex_RsixteenB", "Tetraneuris_acaulis", "Artemisia_pattersonii", "Polygonum_sp", "Dasiphora_fruticosa", "Solidago_multiradiata", "Campanula_sp", "Calamagrostis_purpurascens", "Poa_sp_C", "Poa_sp_B", "Ribes_montigenum", "Artemisia_scopulorum", "Arctostaphylos_uva_ursi", "Erigeron_simplex", "Viola_adunca", "Carex_rossii", "Carex_aquatilis", "Festuca_brachyphylla", "Veronica_wormskjoldii", "Carex_siccata", "Lewisia_pygmaea", "unvegetated", "cover") #correlation circles cor.circle <- function(df,r) { pca.pred <- dudi.pca(na.omit(df[,c(match(r, names(df)))])) s.corcircle(pca.pred$co)} # RUNNING MODELS # Table 1. Main results Nstats(da = sbds.a, ra = COMPOSITE, pa = list( topo = topo, hydro = hydro_krige, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_krige, plants = plants, both = c(core_abiotic_krige,plants), both_cn = c(core_abiotic_krige,plants,"total_percentn","total_C_to_N"))) Nstats(da = sbds.a, ra = COREjul07, pa = list( topo = topo, hydro = hydro_field, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_field, plants = plants, both = c(core_abiotic_field,plants), both_cn = c(core_abiotic_field,plants,"total_percentn","total_C_to_N"))) Nstats(da = sb.a, ra = CORE730, pa = list( topo = topo, hydro = hydro_730, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_730, plants = plants, both = c(core_abiotic_730,plants), both_cn = c(core_abiotic_730,plants,"percentn","singlecoreCN"))) Nstats(da = sb.a, ra = CORE709, pa = list( topo = topo, hydro = hydro_709, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_field, plants = plants, both = c(core_abiotic_field,plants), both_cn = c(core_abiotic_709,plants,"percentn","singlecoreCN"))) Nstats(da = resds.a, ra = RES, pa = list( topo = topo, hydro = hydro_krige, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_krige, plants = plants, both = c(core_abiotic_krige,plants), both_cn = c(core_abiotic_krige,plants,"percentn","C_to_N")),important = F) Nstats(da = sbds.a, ra = COMPOSITE, pa = list( both_cn = c(core_abiotic_krige,plants,"total_percentn","total_C_to_N"))) Nstats(da = sbds.a, ra = COREjul07, pa = list( both_cn = c(core_abiotic_field,plants,"total_percentn","total_C_to_N"))) Nstats(da = sb.a, ra = CORE730, pa = list( both_cn = c("percentn"))) Nstats(da = sb.a, ra = CORE709, pa = list( both_cn = c(core_abiotic_709,plants,"percentn","singlecoreCN"))) Nstats(da = resds.a, ra = RES, pa = list( both_cn = c(core_abiotic_krige,plants,"percentn","C_to_N"))) Nstats(da = sb.a, ra = list("logNO3_730"), pa = list( topo = topo, hydro = hydro_730, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_730, plants = plants, both = c(core_abiotic_730,plants), both_cn = c(core_abiotic_730,plants,"percentn","singlecoreCN"))) Nstats(da = sb.a, ra = list("logNO3_709"), pa = list( topo = topo, hydro = hydro_709, cover = cover, jacc2 = jacc2, canb2 = canb2, abiotic = core_abiotic_field, plants = plants, both = c(core_abiotic_field,plants), both_cn = c(core_abiotic_709,plants,"percentn","singlecoreCN"))) ## FOLLOWUPS #explaining total C to N in all areas with abiotic factors xyplot(total_C_to_N~elevation|equal.count(temp_resid_krige,3)*equal.count(moist_krige,2), g = cover, data = sb) #explaining heavy percentn N boxplot(heavy_pn_core~ordered(cover, levels = c("open","willows","trees")),data = sb.ds, ylab = "Heavy %N") # importance of unvegetated in explaining total percent n (composite) xyplot(total_percentn~unvegetated, sb.ds,g = cover, panel = panel.xylo, subset = cover == "open", main = "Open areas") # importance of soil moisture in explaining total percent n (single core) Rm <- sb$cover levels(Rm) <- c(levels(Rm),"Rm","SK") Rm[sb$Ribes_montigenum > 0] <- "Rm" Rm[sb$Kobresia_myosuroides > 2] <- "SK" Rm[sb$Selaginella_densa > 2] <- "SK" xyplot(percentn~moist_field_730|equal.count(elevation,3), sb, g = Rm, panel = panel.xylog) # top 5 factors used to classify resin-available NO3. Lp <- res.ds$cover levels(Lp) <- c(levels(Lp),"Lp") Lp[res.ds$Lewisia_pygmaea > 2] <- "Lp" xyplot(logNO3~elevation|equal.count(moist_krige,3) *equal.count(C_to_N,2),res.ds,panel = panel.xylog,g = Lp) # specific plants in resin data res.plant <- res[match(c("logNH4","logNO3","logIN","logNH4_resid","species1", "species2","species3","cover"),names(res),)] res.plant <- reshape(res.plant, varying = list(c("species1","species2","species3")), direction = "long") dotplot(reorder(species1,logNH4,median,na.rm = T)~logNH4,res.plant,g = cover, pch = c(16,17,15), col = c("black","forestgreen","red")) dotplot(reorder(species1,logIN,median,na.rm = T)~logIN,res.plant,g = cover, pch = c(16,17,15), col = c("black","forestgreen","red"), subset = res.plant$species1 %in% names(summary(res.plant$species1)[summary(res.plant$species1) > 6])) # percentn vs. logNH4730 xyplot(logNH4_730~percentn,sb, panel = panel.xylog,g = cover) # explaining total d15N xyplot(total_del15n ~ total_C_to_N,sb.ds,panel = panel.xylog,g=cover) xyplot(total_del15n ~ slope|cover,sb.ds,panel = panel.xylog,g=cover,layout = c(2,1,1)) #dot plot of 7/30 NH4 with specific plants rf1 <- randomForest(logNH4_730~slope+percentn+singlecoreCN+unvegetated,data = sb,na.action = na.omit) sb$logNH4_730_resid <- NA sb$logNH4_730_resid[as.numeric(names(rf1$y-rf1$predicted))] <- (rf1$y-rf1$predicted) dotplot(reorder(plant_class_730,logNH4_730_resid,median,na.rm = T)~logNH4_730_resid,data = sb, xlab = "residual log-transformed KCl-extractable NH4+") ## SCATTERPLOT FIGURES # Figure 1. a NH4 vs SOM N b. NH4 vs C:N c. NO3 vs SOM N xyplot(logNH4_730~percentn,sb, panel = panel.xylog,g = cover, xlab = "SOM %N (single cores)", ylab = "KCl-extractable NH4+ (log scale)", scales = logtick(sb$logNH4_730)) xyplot(logNH4_730~singlecoreCN,sb, panel = panel.xylog,g = cover, subset = singlecoreCN < 30, xlab = "SOM C:N ratio (single cores)", ylab = "KCl-extractable NH4+ (log scale)", scales = logtick(sb$logNH4_730)) xyplot(logNO3_730~percentn,sb, panel = panel.xylog,g = cover, xlab = "SOM %N (single cores)", ylab = "log-transformed KCl-extractable NO3-", scales = logtick(sb$logNO3_730,add = 0.1)) # Figure 2. KCl IN on two different dates xyplot(logIN_709~logIN_730,sb, panel = panel.xylog,g = cover, xlab = "KCl-extractable TIN, 7/30/09 (log scale)", ylab = "KCl-extractable TIN, 7/9/09 (log scale)", scales = logtick(sb$logIN_730,sb$logIN_709)) # Figure 3. dot plots of species specific KCl data (a) raw data (b) residuals after subtract %N rf1 <- randomForest(logNH4_730~slope+percentn+singlecoreCN+unvegetated,data = sb,na.action = na.omit) sb$logNH4_730_resid <- NA sb$logNH4_730_resid[as.numeric(names(rf1$y-rf1$predicted))] <- (rf1$y-rf1$predicted) sb.plant <- sb[match(c("logNH4_730","logNO3_730","logIN_730","logNH4_730_resid","species1_730", "species2_730","species3_730","cover"),names(sb),)] sb.plant <- reshape(sb.plant, varying = list(c("species1_730","species2_730","species3_730")), direction = "long") sb.plant <- sb.plant[!is.na(sb.plant$species1_730),] dotplot(reorder(species1_730,logNH4_730,median,na.rm = T)~logNH4_730,data = sb.plant,g = cover, pch = c(16,2,8),col = "black", scales = logtick(x=sb$logNH4_730), xlab = "KCl-extractable NH4+ (log scale)") dotplot(reorder(species1_730,logNH4_730_resid,median,na.rm = T)~logNH4_730_resid, data = sb.plant,g = cover, pch = c(16,2,8), col = "black", xlab = "residual KCl-extractable NH4+ (log scale)") # Figure 4. Resin data against %N xyplot(logNH4~percentn|cover,data=res,g=cover,panel=panel.xylog, scales = logtick(y=res$logNH4), xlab = "SOM %N (interpolated from soil core composites", ylab = "resin-available NH4+ (log scale)") # Figure 5. Resin NO3 trellis plot Lp <- res.ds$cover levels(Lp) <- c(levels(Lp),"Lp") Lp[res.ds$Lewisia_pygmaea > 2] <- "Lp" xyplot(logNO3~elevation|equal.count(moist_krige,3) *equal.count(C_to_N,2),res.ds,panel = panel.xylog,g = Lp, scales = logtick(y=res.ds$logNO3), xlab = "log-transformed resin-available NO3-") # Figure 6. Resin by plant species res.plant <- res[match(c("logNH4","logNO3","logIN","species1", "species2","species3","cover"),names(res),)] res.plant <- reshape(res.plant, varying = list(c("species1","species2","species3")), direction = "long") dotplot(reorder(species1,logIN,median,na.rm = T)~logIN,res.plant,g = cover, pch = c(16,2,8), col = "black", subset = res.plant$species1 %in% names(summary(res.plant$species1)[summary(res.plant$species1) > 6]), scales = logtick(x=res.plant$logIN), xlab = "log-transformed resin-available TIN") # Figure 7. Single core %N by moisture with cover and indicator species marked Rm <- sb$cover levels(Rm) <- c(levels(Rm),"Rm","SK") Rm[sb$Ribes_montigenum > 0] <- "Rm" Rm[sb$Kobresia_myosuroides > 2] <- "SK" Rm[sb$Selaginella_densa > 2] <- "SK" xyplot(percentn~moist_field_730*100, sb, g = Rm, panel = panel.xylog, xlab = "% soil moisture", ylab = "SOM %N (single cores)") # Figure 8. Total %N vs. unvegetated xyplot(total_percentn~unvegetated, sb.ds,g = cover, panel = panel.xylo, subset = cover == "open", main = "Open areas", xlab = "% unvegetated ground", ylab = "SOM %N (soil core composites)") # Figure 9. d15N vs (a) total %N adn (b) C:N xyplot(total_del15n ~ total_percentn,sb.ds,panel = panel.xylog,g=cover, xlab = "SOM %N (soil core composites)", ylab = "SOM d15N (soil core composites)") xyplot(total_del15n ~ total_C_to_N,sb.ds,panel = panel.xylog,g=cover, xlab = "SOM C:N ratio (soil core composites)", ylab = "SOM d15N (soil core composites)") # Figure 10. d15N vs. slope xyplot(total_del15n ~ slope|cover,sb.ds,panel = panel.xylog,g=cover,layout = c(2,1,1), xlab = "slope", ylab = "SOM d15N (soil core composites)")