############################################### #Bayesian Regression Example #Air Data #2/16/06 ############################################### setwd("U:\\Teaching\\Stat694\\Computing\\") library(MASS) #Read in data air.data <- read.table("air.dat",header=T) N <- dim(air.data)[1] ############# #Some EDA... ############# #Assess normality of the response hist(air.data[,1]) y <- log(air.data[,1]) #use log of SO2 #Determine explanatory variables X <- as.matrix(air.data[,-1]) #Pairs plot pairs(cbind(y,X)) #Remove manufacturing X.2 <- X[,-2] #Center the remaining variables X.c <- X.2-matrix(rep(apply(X.2,2,mean),N),N,dim(X.2)[2],byrow=T) cov.names <- names(air.data[,c(-1,-3)]) #Add a column of 1s for the intercept X.int <- cbind(rep(1,N),X.c) #Determine the number of explanatory variables P <- dim(X.int)[2] ############################################# #Classical Regression Analysis ############################################# lm.fit <- lm(y ~ X.c) summary(lm.fit) plot(lm.fit$fitted.values,lm.fit$residuals,ylim=c(-1.2,1.2),xlab="Fitted Values",ylab="Residuals") abline(h=0) abline(h=2*summary(lm.fit)$sigma,lty=2) abline(h=-2*summary(lm.fit)$sigma,lty=2) ############################################# #Bayesian Regression Analysis #w/ non-informative prior ############################################# #Function for sampling from p(beta|sigma2,X,y) sample.beta <- function(sigma2,X,y){ V.beta <- chol2inv(chol(t(X)%*%X)) beta.hat <- V.beta%*%t(X)%*%y return(mvrnorm(1,beta.hat,sigma2*V.beta)) } #Function for sampling from p(sigma2|X,y) sample.sigma2 <- function(X,y,N,P){ V.beta <- chol2inv(chol(t(X)%*%X)) beta.hat <- V.beta%*%t(X)%*%y s2 <- t(y-X%*%beta.hat)%*%(y-X%*%beta.hat)/(N-P) return(1/rgamma(1,shape=(N-P)/2,scale=1/(s2/2))) } #Set number of samples K <- 1000 #Initialize variables for the posterior samples beta.post.samples = matrix(0,K,P) sigma2.post.samples = rep(0,K) #Draw samples from the posterior of the betas and sigma2 for(i in 1:K){ sigma2.post.samples[i] <- sample.sigma2(X.int,y,N,P-1) beta.post.samples[i,] <- sample.beta(sigma2.post.samples[i],X.int,y) } #Examine boxplots of the posterior samples hist(sigma2.post.samples) boxplot(as.data.frame(beta.post.samples[,-1]),names=cov.names) abline(h=0) #Posterior summaries post.summary <- data.frame(round(cbind(apply(beta.post.samples,2,mean),apply(beta.post.samples,2,sd),t(apply(beta.post.samples,2,function(x){quantile(x,c(.025,.975),names=F)}))),4)) names(post.summary) <- c("mean","sd","q2.5","q97.5") row.names(post.summary) <- c("Intercept",cov.names) post.summary #Look at the correlation between the betas pairs(beta.post.samples,labels=c("Intercept",cov.names)) #Find the posterior probability that each of the parameters is greater than 0 apply(beta.post.samples > 0, 2,mean)