###################################################### ############ Handout #8 for ST476/576 ############## ################ Factor Analysis ################### ###################################################### ############################################### ################### Example 1 ################# #### PC and MLE solution on simulated data #### ############################################### set.seed(22) ## Define true Factor Model Parameters ## mu <- c(10,5,2,5,0) L <- rbind(c(1,0), c(2,0), c(2,0), c(0,2), c(0,2)) Psi <- diag(c(1,2,1,2,1)) Sigma <- L%*%t(L) + Psi ## Generate the Data ## n <- 50 p <- length(mu) decomp <- eigen(Sigma) Lambda <- diag(decomp$values) P <- decomp$vectors Sigma.sqrt <- P%*%sqrt(Lambda)%*%t(P) Z <- matrix(rnorm(p*n), nrow=n, ncol=p) M <- matrix(rep(mu, times=n), nrow=p, ncol=n) X <- Z%*%Sigma.sqrt + t(M) pairs(X) ## Perform PC factor analysis on the data ## X_pca <- princomp(X, cor=TRUE) ##How many factors summary(X_pca) screeplot(X_pca, type='lines') nfac <- 2 ##Obtain Factor Loadings X_pca$loadings[1:p,1:nfac] L_pc <- X_pca$loadings[1:p,1:nfac]%*%diag(X_pca$sdev[1:nfac]) L_pc <- X_pca$loadings[1:p,1:nfac]*X_pca$sdev[1:nfac] ## if nfac=1, no rotation if nfac=1 L_pc ##Estimate Specific Variances Psi_pc <- diag(diag(cor(X)-L_pc%*%t(L_pc))) Psi_pc ##Rotate Factors rot_pc <- varimax(L_pc) L_rot_pc <- L_pc%*%rot_pc$rotmat L_rot_pc ##make the negative loadings positive L_rot_pc <- L_rot_pc%*%cbind(c(-1,0),c(0,1)) L_rot_pc ##Plot Factor Loadings plot(L_pc, xlim=c(-1,1), ylim=c(-1,1)) lines(c(-2,2), c(0,0), lty=2) lines(c(0,0), c(-2,2), lty=2) points(L_rot_pc, pch='*', col='red') ##Plot Factor Scores scores_pc <- (X-rep(1,nrow(X))%x%t(colMeans(X)))%*%L_rot_pc%*%solve(t(L_rot_pc)%*%L_rot_pc) plot(scores_pc) ## Perform MLE factor analysis on the data ## X_mle <- factanal(X, factors=nfac, scores="regression") X_mle ##Factor Loadings L_mle <- X_mle$loadings[1:p,1:nfac] L_mle ##Specific Variances Psi_mle <- diag(X_mle$unique) Psi_mle ##Rotate Factors rot_mle <- varimax(L_mle) L_rot_mle <- L_mle%*%rot_mle$rotmat L_rot_mle ## Plot PC Factor Scores vs. MLE factor scores scores_mle <- X_mle$scores par(mfrow=c(1,2)) plot(scores_mle[,1], scores_pc[,1]) plot(scores_mle[,2], scores_pc[,2]) par(mfrow=c(1,1)) ## Just for fun compare estimated Correlation matrices ##Corr matrix estimate from PC solution R_pc <- L_pc%*%t(L_pc) + Psi_pc R_pc ##Corr matrix estimate from MLE solution R_mle <- L_mle%*%t(L_mle) + Psi_mle R_mle ##Unconstrained Corr matrix estimate R <- cor(X) R ##Actual Corr matrix V <- diag(diag(1/sqrt(Sigma))) Rho <- V %*% Sigma %*% V Rho sum((R_pc-Rho)^2) sum((R_mle-Rho)^2) sum((R-Rho)^2) ################################################## ################# Example 2 ###################### #### Factor Analysis for Stock-Price data ######## ################################################## #### Read in data #### ex.data <- read.table(file="W:/teaching/stat4765762012/data/stocks_data.txt", header=T) pairs(ex.data) ## Perform PC factor analysis on the data ## stock_pca <- princomp(~ jpmorgan + citibank + wellsfargo + royaldutch + exxon, data=ex.data, cor=TRUE) summary(stock_pca) screeplot(stock_pca, type='lines') p<-5 nfac <- 2 ##Obtain Factor Loadings stock_pca$loadings[1:p,1:nfac] L_pc <- stock_pca$loadings[1:p,1:nfac]%*%diag(stock_pca$sdev[1:nfac]) L_pc <- stock_pca$loadings[1:p,1:nfac]*stock_pca$sdev[1:nfac] #if nfac=1 L_pc ##page 498, Talbe 9.3### ##make the negative loadings positive L_pc<-L_pc%*%cbind(c(-1,0),c(0,-1)) ##Estimate Specific Variances X <- as.matrix(ex.data) Psi_pc <- diag(diag(cor(X)-L_pc%*%t(L_pc))) Psi_pc ##Rotate Factors rot_pc <- varimax(L_pc) L_rot_pc <- L_pc%*%rot_pc$rotmat L_rot_pc ##Plot Factor Scores scores_pc <- (X-rep(1,nrow(X))%x%t(colMeans(X)))%*%L_rot_pc%*%solve(t(L_rot_pc)%*%L_rot_pc) plot(scores_pc) ## Perform MLE factor analysis on the data## X_mle <- factanal(X, factors=nfac, scores="regression") X_mle ##Factor Loadings L_mle <- X_mle$loadings[1:p,1:nfac] L_mle ##Specific Variances Psi_mle <- diag(X_mle$unique) Psi_mle ##Rotate, page 510, Table 9.8 ##### rot_mle <- varimax(L_mle) L_rot_mle <- L_mle%*%rot_mle$rotmat L_rot_mle ## Plot PC Factor Scores vs. MLE factor scores scores_mle <- X_mle$scores par(mfrow=c(1,2)) plot(scores_mle[,1], scores_pc[,1]) plot(scores_mle[,2], scores_pc[,2]) par(mfrow=c(1,1))