2

I want to know how to compute the importance of components for geographically-weighted-principal-component-analysis(gwpca)? That is, I want to reduce dimension according to cumulative proportion of component.

Here is the code about pca and gwpca from the jstatsoft paper about GWmodel package

require(GWmodel)

data("DubVoter")

Data.scaled <- scale(as.matrix(Dub.voter@data[,4:11]))

pca.basic <- princomp(Data.scaled, cor = FALSE) 
(pca.basic$sdev^2 / sum(pca.basic$sdev^2))*100

pca.basic$loadings

R.COV <- covMcd(Data.scaled, cor = FALSE, alpha = 0.75)
pca.robust <- princomp(Data.scaled, covmat = R.COV, cor = FALSE)
pca.robust$sdev^2 / sum(pca.robust$sdev^2)

pca.robust$loadings

Coords <- as.matrix(cbind(Dub.voter$X, Dub.voter$Y)) 
Data.scaled.spdf <- SpatialPointsDataFrame(Coords, as.data.frame(Data.scaled))

bw.gwpca.basic <- bw.gwpca(Data.scaled.spdf, 
                           vars = colnames(Data.scaled.spdf@data), k = 3, robust = FALSE, adaptive = TRUE) 
bw.gwpca.basic

bw.gwpca.robust <- bw.gwpca(Data.scaled.spdf, 
                            vars = colnames(Data.scaled.spdf@data), k = 3, robust = TRUE, adaptive = TRUE) 
bw.gwpca.robust

gwpca.basic <- gwpca(Data.scaled.spdf, 
                     vars = colnames(Data.scaled.spdf@data), bw = bw.gwpca.basic, k = 8, 
                     robust = FALSE, adaptive = TRUE)

gwpca.robust <- gwpca(Data.scaled.spdf, 
                      vars = colnames(Data.scaled.spdf@data), bw = bw.gwpca.robust, k = 8, 
                      robust = TRUE, adaptive = TRUE)

prop.var <- function(gwpca.obj, n.components) { 
    return((rowSums(gwpca.obj$var[, 1:n.components]) / 
                rowSums(gwpca.obj$var)) * 100)
}

var.gwpca.basic <- prop.var(gwpca.basic, 3) 
var.gwpca.robust <- prop.var(gwpca.robust, 3)

Dub.voter$var.gwpca.basic <- var.gwpca.basic 
Dub.voter$var.gwpca.robust <- var.gwpca.robust

mypalette.4 <-brewer.pal(8, "YlGnBu")

spplot(Dub.voter, "var.gwpca.basic", key.space = "right", 
       col.regions = mypalette.4, cuts = 7, 
       main = "PTV for local components 1 to 3 (basic GW PCA)", 
       sp.layout = map.layout)

spplot(Dub.voter, "var.gwpca.robust", key.space = "right", 
       col.regions = mypalette.4, cuts = 7,
       main = "PTV for local components 1 to 3 (robust GW PCA)", 
       sp.layout = map.layout)

loadings.pc1.basic <- gwpca.basic$loadings[,,1] 
win.item.basic <- max.col(abs(loadings.pc1.basic))

loadings.pc1.robust <- gwpca.robust$loadings[,,1] 
win.item.robust <- max.col(abs(loadings.pc1.robust))

Dub.voter$win.item.basic <- win.item.basic 
Dub.voter$win.item.robust <- win.item.robust

mypalette.5 <- c("lightpink", "blue", "grey", "purple",
                 "orange", "green", "brown", "yellow")

spplot(Dub.voter, "win.item.basic", key.space = "right", 
       col.regions = mypalette.5, at = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
       main = "Winning variable: highest abs. loading on local Comp.1 (basic)",
       colorkey = FALSE, sp.layout = map.layout)

spplot(Dub.voter, "win.item.robust", key.space = "right", 
       col.regions = mypalette.5, at = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
       main = "Winning variable: highest abs. loading on local Comp.1 (robust)",
       colorkey = FALSE, sp.layout = map.layout)

The pca.basic indicates we can keep the first to 8th component because of cumulative proportion of comp.8 is higher than 85%. But how can I conduct similar job with gwpca ?

Some related materials :

How can I conduct Geographically Weighted Principal Component Analysis using ArcGIS, Python and SPSS/R?

Harris P, Clarke A, Juggins S, et al. Enhancements to a Geographically Weighted Principal Component Analysis in the Context of an Application to an Environmental Data Set[J]. Geographical Analysis, 2015, 47(2): 146-172.

seifer_08ms
  • 239
  • 1
  • 8

1 Answers1

3

Here are some functions to solve this issue.

I use the "Cumulative Proportion" as a guide how many local principal components to keep. Just like global PCA, I define the percentage of variance and then select the local componets which cumulatively accounts for 85% and more variance on example data we would like to keep.

cum..prop.var <- function(gwpca.obj, n.components,...) { 
    return((rowSums(gwpca.obj$var[, 1:n.components],...) / 
                rowSums(gwpca.obj$var,...)))
}
prop.var<- function(gwpca.obj,...) { 
    return ( sapply(1:ncol(gwpca.obj$var),function(x)
            {(gwpca.obj$var[, x])/rowSums(gwpca.basic$var,...)}) )
}
cum.prop.var.all<-function(gwpca.obj,...){
    return (t(apply(gwpca.obj$var,1,cumsum))/rowSums(gwpca.obj$var))
}

cum.var.gwpca.basic <- cum..prop.var(gwpca.basic, 3,na.rm=F) #PTV
cum.var.gwpca.robust <- cum..prop.var(gwpca.robust, 3,na.rm=F)
cum.var.gwpca.basic.all<-cum.prop.var.all(gwpca.basic,na.rm=F)
cum.var.gwpca.robust.all<-cum.prop.var.all(gwpca.robust,na.rm=F)

#how many pc should be retained (85% and more variance on example data)
local.index.basic<-sapply(apply(cum.var.gwpca.basic.all>=0.85,1,which),min)
local.index.robust<-sapply(apply(cum.var.gwpca.robust.all>=0.85,1,which),min)
# robust local components contain more information about original data
# explore all components..
local.loadings.basic <-gwpca.basic$loadings[, , 1:max(local.index.basic)]
local.loadings.robust <-gwpca.robust$loadings[, , 1:max(local.index.robust)]
# explore all retained local components..
local.loadings.robust.retain<-sapply(1:length(local.index.robust),
       function(x){local.loadings.robust[x,,1:local.index.robust[x]]})
seifer_08ms
  • 239
  • 1
  • 8
  • How exactly do these functions assess "importance" of components? This is a key question because there are many different ways to determine "importance" in PCA (and most of them are controversial). – whuber Jul 30 '15 at 15:56
  • Yes, you're right. I use the "Cumulative Proportion" as a guide how many local principal components to keep. Just like global PCA, I define the percentage of variance and then select the local componets which cumulatively accounts for 85% and more variance on example data we would like to keep. – seifer_08ms Jul 31 '15 at 00:32