rm(list=ls()) library(reshape) library(pls) ############################################## FUNCTIONS VIPjh=function(object, j, h) { if (object$method != "oscorespls") stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'") if (nrow(object$Yloadings) > 1) stop("Only implemented for single-response models") b=c(object$Yloadings)[1:h] T=object$scores[,1:h, drop = FALSE] SS=b^2 * colSums(T^2) W=object$loading.weights[,1:h, drop = FALSE] Wnorm2=colSums(W^2) VIP=sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS)) return(VIP) } ############################################## DEFAULTS allData=read.csv("") saveDir="" names(allData) dim(allData) inVar="" dim(allData) inBands=names(allData)[grep("^",names(allData))] iForm=paste(inBands,collapse="+") iForm=as.formula(paste(inVar,"~",iForm)) iForm ############################################## FIND COMPONENTS ## RANDOMIZE TO GET OPTIMAL NCOMP ## USE nCut OF DATA TO BUILD MODELS ## USE nsims CVs nCut=floor(.*nrow(allData)) nComps= nsims= outMat=matrix(data=NA,nrow=nsims,ncol=nComps) for (nsim in seq(nsims)) { print(nsim) flush.console() allData$RAND=order(runif(nrow(allData))) subData=allData[allData["RAND"]=nCut,] nSub=nrow(subData) resX=plsr(iForm,data=subData,ncomp=nComps,method="oscorespls") resS=plsr(iForm,data=subData,ncomp=nComps,method="oscorespls",scale=T) # GET COEFFICIENTS (raw and standardized) coefs=as.vector(coef(resX,ncomp=nComps,intercept=T)) zcoef=as.vector(coef(resS,ncomp=nComps,intercept=T)) coefMat[nsim,]=coefs coefStd[nsim,]=zcoef # GET VIP vip=c() for (j in seq(length(inBands))) { vip=c(vip,VIPjh(resS,j,nComps)) } vipMat[,nsim]=vip # GET FITS fitX=as.vector(unlist(resX$fitted.values[,1,nComps])) preX=as.vector(unlist(predict(resX,ncomp=nComps,newdata=tstData))) fitBias=mean(subData[,inVar]-fitX) valBias=mean(tstData[,inVar]-preX) fitR2=summary(lm(subData[,inVar]~fitX))$adj.r.squared valR2=summary(lm(tstData[,inVar]~preX))$adj.r.squared fitRMSE=sqrt(mean((subData[,inVar]-fitX)^2)) valRMSE=sqrt(mean((tstData[,inVar]-preX)^2)) outVec=c(fitR2,fitRMSE,fitBias,valR2,valRMSE,valBias) statMat[nsim,]=outVec } ######################################################## GET PREDICTIONS statMat=as.data.frame(statMat) names(statMat)=c("fitR2","fitRMSE","fitBias","valR2","valRMSE","valBias") specMat=allData[,inBands] specMat=cbind(rep(1,nrow(specMat)),specMat) specMat=as.matrix(specMat) dim(specMat) predMat=specMat%*%t(coefMat) predMean=apply(predMat,FUN=mean,MAR=1) predStdv=apply(predMat,FUN=sd,MAR=1) modSum=summary(lm(allData[,inVar]~predMean)) modR2=modSum$adj.r.squared modRMSE=sqrt(mean((allData[,inVar]-predMean)^2)) modBias=mean(allData[,inVar]-predMean) # WRITE OUT STATISTICS write.csv(statMat,paste(saveDir,inVar,"_fitModelStats.csv",sep=""),row.names=FALSE) allData[,paste("predMean_",inVar,sep="")]=predMean allData[,paste("predStdv_",inVar,sep="")]=predStdv write.csv(allData,paste(saveDir,inVar,"_fitModelData.csv",sep=""),row.names=FALSE) png(paste(saveDir,inVar,"Fit_fitModels.png",sep=""),width=5,height=5,units="in",res=200) plot(allData[,inVar],predMean,pch=16,cex=2,xlab="observed",ylab="predicted",main=inVar,ylim=c(min(predMean-predStdv),max(predMean+predStdv))) abline(0,1,col="red",lwd=2,lty=2) abline(lm(predMean~allData[,inVar]),col="black",lwd=2) arrows(allData[,inVar],predMean,allData[,inVar],predMean+predStdv,angle=90,length=0.1) arrows(allData[,inVar],predMean,allData[,inVar],predMean-predStdv,angle=90,length=0.1) points(allData[,inVar],predMean,pch=16,cex=2) dev.off() ######################################################## GET VIPs, COEFFICIENTS # WRITE OUT VIPs vipAggr=as.data.frame(t(apply(vipMat,MAR=1,FUN=quantile,probs=c(0.05,0.5,0.95)))) vipMean=apply(vipMat,MAR=1,FUN=mean) vipStdv=apply(vipMat,MAR=1,FUN=sd) vipAggr$mean=vipMean vipAggr$stdv=vipStdv vipAggr$BandName=inBands write.table(vipAggr,paste(saveDir,inVar,"_output_VIP.csv",sep=""),sep=",",col.names=T,row.names=F) # WRITE OUT COEFFICIENTS RAW coefAggr=as.data.frame(t(apply(coefMat,MAR=2,FUN=quantile,probs=c(0.05,0.5,0.95)))) dim(coefAggr) coefMean=apply(coefMat,MAR=2,FUN=mean) coefStdv=apply(coefMat,MAR=2,FUN=sd) coefAggr$mean=coefMean coefAggr$stdv=coefStdv coefAggr$BandName=c("Intercept",inBands) write.table(coefAggr,paste(saveDir,inVar,"_output_COEF_RAW.csv",sep=""),sep=",",col.names=T,row.names=F) write.table(coefMat,paste(saveDir,inVar,"_output_COEFmat.csv",sep=""),sep=",",col.names=T,row.names=F) # WRITE OUT COEFFICIENTS STD coefAggrS=as.data.frame(t(apply(coefStd,MAR=2,FUN=quantile,probs=c(0.05,0.5,0.95)))) dim(coefAggrS) coefMeanS=apply(coefStd,MAR=2,FUN=mean) coefStdvS=apply(coefStd,MAR=2,FUN=sd) coefAggrS$mean=coefMeanS coefAggrS$stdv=coefStdvS coefAggrS$BandName=c("Intercept",inBands) write.table(coefAggrS,paste(saveDir,inVar,"_output_COEF_STD.csv",sep=""),sep=",",col.names=T,row.names=F) ########################################################