時系列データのPCAが目的 4変数(カテゴリ数2)に対して、PCAとRobust PCAの2通り。 その後、Robust PCA の結果を density plot
require(data.table) # 4 変数 sl1 = c(5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1) sw1 = c(3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8) pl1 = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5) pw1 = c(0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3) type1 = rep(1, length(sl1)) # 別の 4 変数 sl2 = c(6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9) sw2 = c(3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0) pl2 = c(5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1) pw2 = c(2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8) type2 = rep(2, length(sl2)) # 4変数からデータフレームを作り、。。。 require(data.table) T <-data.table(sl=c(sl1, sl2), sw=c(sw1, sw2), pl=c(pl1, pl2), pw=c(pw1, pw2)) str(T) # 主成分分析 (PCA) で 2変数を取り出す。つまり (sl, sw, pl, pw) -> (pcX, pcY) prcomp.obj <- prcomp(T, scale=TRUE) # 主成分分析 pcX <- prcomp.obj$x[,1] # 第一主成分得点 pcY <- prcomp.obj$x[,2] # 第二主成分得点 plot(pcX, pcY, col=as.factor( c(type1, type2) ) ) # robust PCA require(MASS) pc <- princomp(T, covmat=cov.mcd(T)) pcX <- pc$scores[,1] # 第一主成分得点 pcY <- pc$scores[,2] # 第二主成分得点 plot(pcX, pcY, col=as.factor( c(type1, type2) ) ) # # pcX, pcY のデータを density plot. # require(gregmisc) require(lattice) require(ggplot2) # ★ここを設定して下さい pcX1 <- pcX[1:length(sl1)] pcY1 <- pcY[1:length(sl1)] pcX2 <- pcX[(length(sl1)+1):(length(sl1)+length(sl2))] pcY2 <- pcY[(length(sl1)+1):(length(sl1)+length(sl2))] # 1枚目の画像 (赤) h2d <- hist2d(pcX1, pcY1, show=FALSE, same.scale=TRUE) zzz <- h2d$counts xxx <- h2d$x yyy <- h2d$y dataX <- matrix(nrow=(length(xxx)*length(yyy)), ncol=3) for(i in 1:length(xxx)){ for(j in 1:length(yyy)){ dataX[i+(length(xxx)*(j-1)), 1] <- xxx[i] dataX[i+(length(xxx)*(j-1)), 2] <- yyy[j] dataX[i+(length(xxx)*(j-1)), 3] <- zzz[i,j] } } data3 <- as.data.table(dataX) sun <- c("#110000","#660011","#bb0000","#C93211","#D76423","#F0BB43","#ffee55","#FFF5A3","#FFFCE8") sea <- c("#002046","#0066ff","#11aadd","#44ccee","#00ddff","#77ffff","#bbeeee","#ffffee") ggplot(data3, aes(x=data3[,1], y=data3[,2], fill=data3[,3]/max(data3[,3]))) + geom_tile() + scale_fill_gradientn("frequency",limits = c(0, 0.000625) , colours = sun) # 2枚目の画像 (青) h2d <- hist2d(pcX2, pcY2, show=FALSE, same.scale=TRUE) zzz <- h2d$counts xxx <- h2d$x yyy <- h2d$y dataX <- matrix(nrow=(length(xxx)*length(yyy)), ncol=3) for(i in 1:length(xxx)){ for(j in 1:length(yyy)){ dataX[i+(length(xxx)*(j-1)), 1] <- xxx[i] dataX[i+(length(xxx)*(j-1)), 2] <- yyy[j] dataX[i+(length(xxx)*(j-1)), 3] <- zzz[i,j] } } data3 <- as.data.table(dataX) sun <- c("#110000","#660011","#bb0000","#C93211","#D76423","#F0BB43","#ffee55","#FFF5A3","#FFFCE8") sea <- c("#002046","#0066ff","#11aadd","#44ccee","#00ddff","#77ffff","#bbeeee","#ffffee") ggplot(data3, aes(x=data3[,1], y=data3[,2], fill=data3[,3]/max(data3[,3]))) + geom_tile() + scale_fill_gradientn("frequency",limits = c(0, 0.000625) , colours = sea)