##### 2段階推定法による階層潜在クラスモデルの推定プログラム ##### # このプログラムにはパッケージ"gtools"が必要. library(gtools) ### 階層潜在クラスモデルオブジェクト(条件付確率表なし:ネットワーク構造の情報のみ)"structure"の定義. ### # structure$hidden$adj: 潜在変数間の構造を示す隣接行列. # structure$hidden$dim: 各潜在変数の次元. # structure$observed$parent: 各観測変数の親である潜在変数の番号. # structure$observed$dim: 各観測変数の次元. ### 階層潜在クラスモデルオブジェクト"hlc"の定義. ### # hlc$hidden.structure: 潜在変数の構造に関する情報. # hlc$root.node: ルートノード番号. # hlc$hidden.structure$adj: 潜在変数間の構造を示す隣接行列. # hlc$hidden.structure$dim: 各潜在変数の次元. # hlc$hidden.structure$n.var: 潜在変数の数. # hlc$hidden.structure$cpt: 各潜在変数の条件付確率表(CPT)を表す3次元配列. #  P(Ti|Pa..Ti) = cpt[i,p(Ti|Pa..Ti),Pa..Ti]. # dim(cpt) = c(n.var,max(hidden.dim),max(hidden.dim)). # hlc$observed.structure: 観測変数の構造に関する情報. # hlc$observed.structure$parent: 各観測変数の親である潜在変数の番号. # hlc$observed.structure$dim: 各観測変数の次元. # hlc$observed.structure$n.var: 観測変数の数. # hlc$observed.structure$cpt: 各観測変数の条件付確率表(CPT)を表す3次元配列. # P(Xi|Pa..Tj) = cpt[i,p(Xi|Pa..Tj),Pa..Tj]. # dim(cpt) = c(n.var,max(observed.dim),max(hidden.dim)). ### 階層潜在クラスモデルで確率伝搬法を適用するためのオブジェクト"hlc.bp"の定義. ### # hlc.bp$hlc: hlcオブジェクト. # hlc.bp$n.evidence: エビデンスの数. # hlc.bp$evidences: エビデンス行列. # hlc.bp$hidden: 潜在変数のπ&λ分布・メッセージの情報. # hlc.bp$hidden$lambda: 各エビデンスに対する各潜在変数のλ分布を表す4次元配列. # Lambda(Ti|evidence..k) = lambda[k,i,lambda..distribution(Ti)]. # dim(lambda) = c(n.evidence,n.var,max(dim)). # hlc.bp$hidden$pi: 各エビデンスに対する各潜在変数のπ分布を表す4次元配列. # Pi(Ti|evidence..k) = pi[k,i,pi..distribution(Ti)]. # dim(pi) = c(n.evidence,n.var,max(dim)). # hlc.bp$hidden$lambda.m: 各エビデンスに対する各潜在変数のλメッセージを表す4次元配列. # Lambda.message(Ti->Pa..Ti|evidence..k) = lambda.m[k,i,lambda.m..distribution(Pa..Ti)]. # dim(lambda.m) = c(n.evidence,n.var,max(dim)). # hlc.bp$hidden$pi.m: 各エビデンスに対する各潜在変数のπメッセージを表す4次元配列. # Pi.message(Ti->Ch..Ti|evidence..k) = pi.m[k,i,Ch..Ti,pi.m..distribution(Ti)]. # dim(pi.m) = c(n.evidence,n.var,n.var,max(dim)). # hlc.bp$observed$lambda.m: 各エビデンスに対する各観測変数のλメッセージを表す4次元配列. # Lambda.message(Xi->Pa..Tj|evidence..k) = lambda.m[k,i,lambda.m..distribution(Pa..Tj)]. # dim(lambda.m) = c(n.evidence,observed.n.var,max(hidden.dim)) # hlc.bp$BP.order: 確率伝搬法の伝搬順序. ### 周辺化計算Σ..xi array[X1,...,Xi,...Xn]: 多次元配列のi番目の要素を周辺消去する. ### この関数は marginal.table(x,index[-i]) より高速. marginalize <- function(x,i){ index <- 1:length(dim(x)) x2 <- aperm(x,c(i,index[-i])) x3 <- matrix(x2,nrow=dim(x)[i]) e <- matrix(1,nrow=1,ncol=dim(x)[i]) y <- array(e%*%x3,dim=dim(x)[-i]) return(y) } #### hclustオブシェクトから,指定のクラスターに属するノード番号を取得する関数. # c.num: クラスター番号 (= hclust$merge). # この関数は再帰的関数. get.subtree.vars <- function(hcl,c.num){ if(c.num < 0) return(-c.num) else{ x <- hcl$merge[c.num,] subtree.vars <- c(get.subtree.vars(hcl,x[1]),get.subtree.vars(hcl,x[2])) return(subtree.vars) } } ### structureオブジェクトからhlcオブシェクトを生成する関数. set.hlc <- function(structure){ hidden.adj <- structure$hidden$adj hidden.dim <- structure$hidden$dim observed.dim <- structure$observed$dim h.n.var <- length(hidden.dim) o.n.var <- length(observed.dim) o.parent <- structure$observed$parent h.parent <- apply(structure$hidden$adj,2,which.max) root <- which.min(apply(hidden.adj,2,sum)) # 観測変数の条件付確率表を設定(確率値はランダムに設定). observed.cpt <- array(0,dim=c(o.n.var,max(observed.dim),max(hidden.dim))) for(i in 1:o.n.var){ a <- runif(hidden.dim[o.parent[i]]*observed.dim[i],min=0,max=1) a <- array(a,dim=c(observed.dim[i],hidden.dim[o.parent[i]])) observed.cpt[i,1:observed.dim[i],1:hidden.dim[o.parent[i]]] <- prop.table(a,2) } # 潜在変数の条件付確率表を設定(確率値はランダムに設定). hidden.cpt <- array(0,dim=c(h.n.var,max(hidden.dim),max(hidden.dim))) for(i in 1:h.n.var){ if(i==root){ a <- runif(hidden.dim[i],min=0,max=1) a <- array(a,dim=c(hidden.dim[i])) hidden.cpt[i,1:hidden.dim[i],1] <- prop.table(a) }else{ a <- runif(hidden.dim[i]*hidden.dim[h.parent[i]],min=0,max=1) a <- array(a,dim=c(hidden.dim[i],hidden.dim[h.parent[i]])) hidden.cpt[i,1:hidden.dim[i],1:hidden.dim[h.parent[i]]] <- prop.table(a,2) } } hlc <- list() hlc$hidden.structure$root.node <- root hlc$names <- as.character(1:o.n.var) hlc$hidden.structure$adj <- hidden.adj; hlc$hidden.structure$dim <- hidden.dim hlc$hidden.structure$n.var <- h.n.var; hlc$hidden.structure$cpt <- hidden.cpt hlc$observed.structure$parent <- o.parent; hlc$observed.structure$dim <- observed.dim hlc$observed.structure$n.var <- o.n.var; hlc$observed.structure$cpt <- observed.cpt # クラスの設定. class(hlc) <- "hlc" return(hlc) } print.hlc <- function(hlc){ cat("Hierarchical latent class model.\n") cat("*** number of hidden variables:",hlc$hidden.structure$n.var,"\n") cat("*** number of observed variables:",hlc$observed.structure$n.var,"\n") cat("*** number of parameters:",get.num.of.param(hlc),"\n") } ### hlcオブジェクトのパラメタ数を計算する関数. get.num.of.param <- function(hlc){ hidden.dim <- hlc$hidden.structure$dim hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max) observed.dim <- hlc$observed.structure$dim observed.parent <- hlc$observed.structure$parent n.par1 <- (observed.dim-1)*hidden.dim[observed.parent] hidden.parent.dim <- hidden.dim[hidden.parent] hidden.parent.dim[hlc$root.node] <- 1 n.par2 <- (hidden.dim-1)*hidden.parent.dim return(sum(n.par1)+sum(n.par2)) } ### hlcオブジェクトとエヴィデンス行列(データ行列)からhlc.bpオブシェクトを生成する関数. set.hlc.bp <- function(hlc,evidences){ hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim hidden.n.var <- hlc$hidden.structure$n.var observed.n.var <- hlc$observed.structure$n.var hidden.adj <- hlc$hidden.structure$adj hidden.parent <- apply(hidden.adj,2,which.max) observed.parent <- hlc$observed$parent root <- hlc$hidden.structure$root.node hidden.parent[root] <- 0 n.evidence <- dim(evidences)[1] # 潜在変数のλ分布,π分布,λメッセージ,πメッセージ配列の初期化(一様分布に設定). h.lambda <- array(0,dim=c(n.evidence,hidden.n.var,max(hidden.dim))) for(i in 1:hidden.n.var) h.lambda[,i,1:hidden.dim[i]] <- 1/hidden.dim[i] h.pi <- h.lambda h.pi.m <- array(0,dim=c(n.evidence,hidden.n.var,hidden.n.var,max(hidden.dim))) for(i in 1:hidden.n.var){ children <- which(hlc$hidden.structure$adj[i,]==1) if(!identical(children,integer(0))) h.pi.m[,i,children,1:hidden.dim[i]] <- 1/hidden.dim[i] } h.lambda.m <- array(0,dim=c(n.evidence,hidden.n.var,max(hidden.dim))) for(i in (1:hidden.n.var)[-root]) h.lambda.m[,i,1:hidden.dim[hidden.parent[i]]] <- 1/hidden.dim[hidden.parent[i]] # 観測変数のλメッセージをエビデンス行列を元に設定. evidences2 <- array(0,dim=c(n.evidence,observed.n.var,max(observed.dim),max(hidden.dim))) for(k in 1:n.evidence){ for(i in 1:observed.n.var){ # if there is not evidence, evidences[k,i] == 0. if(evidences[k,i]==0){ #evidences2[k,i,,] = hlc$observed.structure$cpt[i,,] evidences2[k,i,,] <- 1 }else evidences2[k,i,as.numeric(evidences[k,i]),1:hidden.dim[observed.parent[i]]] <- 1 } } # 確率伝搬法の伝搬順序を設定. BP.order <- NULL; BP.que <- root while(length(BP.que)!=0){ BP.order <- append(BP.order,BP.que[1]) children <- which(hidden.adj[BP.que[1],]==1) BP.que = BP.que[-1] if(!identical(children,integer(0))) BP.que <- append(BP.que,children) } BP.order <- rev(BP.order) # この関数では hlc.bp$observed$lambda.m は設定しない.hlc.bp関数内で設定する. hlc.bp <- hlc hlc.bp$BP.order <- BP.order hlc.bp$hidden$lambda <- h.lambda; hlc.bp$hidden$pi <- h.pi hlc.bp$hidden$lambda.m <- h.lambda.m; hlc.bp$hidden$pi.m <- h.pi.m hlc.bp$observed$lambda.m <- NULL hlc.bp$evidences2 <- evidences2; hlc.bp$n.evidence <- n.evidence # "hlc"クラスを継承して"hlc.bp"クラスを設定. class(hlc.bp) <- "hlc.bp" inherits(hlc.bp,"hlc") return(hlc.bp) } ### 階層潜在クラスモデルで確率伝搬法により事後確率を計算する関数. # dataがNULLであれば,各変数の周辺確率を計算する. HLC.BP <- function(hlc,data=NULL){ if(identical(data,NULL)) data <- 1 %o% rep(0,hlc$observed.structure$n.var) else if(identical(dim(data),NULL)) data <- 1 %o% data lambda.prod = function(lambda.ms){ if(length(dim(lambda.ms))==2) lambda <- lambda.ms else{ lambda <- exp(marginalize(log(lambda.ms),2)) } return(lambda) } # λメッセージから潜在変数Tiのλ分布を計算する関数. calc.lambda <- function(hlc.bp,i){ hidden.dim <- hlc.bp$hidden.structure$dim hidden.n.var <- hlc.bp$hidden.structure$n.var hidden.adj <- hlc.bp$hidden.structure$adj h.children <- which(hidden.adj[i,]==1) o.children <- which(hlc.bp$observed.structure$parent==i) if(!identical(o.children,integer(0))){ o.lambda.ms <- hlc.bp$observed$lambda.m[,o.children,1:hidden.dim[i]] if(hlc.bp$n.evidence==1) o.lambda.ms <- 1 %o% o.lambda.ms if(length(o.children)==1) o.lambda <- o.lambda.ms else o.lambda <- lambda.prod(o.lambda.ms) if(identical(h.children,integer(0))) h.lambda <- 1 else{ h.lambda.ms <- hlc.bp$hidden$lambda.m[,h.children,1:hidden.dim[i]] if(hlc.bp$n.evidence==1) h.lambda.ms <- 1 %o% h.lambda.ms h.lambda <- lambda.prod(h.lambda.ms) } lambda <- o.lambda * h.lambda }else{ h.lambda.ms <- hlc.bp$hidden$lambda.m[,h.children,1:hidden.dim[i]] if(hlc.bp$n.evidence==1) h.lambda.ms <- 1 %o% h.lambda.ms lambda <- lambda.prod(h.lambda.ms) } hlc.bp$hidden$lambda[,i,1:hidden.dim[i]] <- lambda return(hlc.bp) } # πメッセージから潜在変数Tiのπ分布を計算する関数. calc.pi <- function(hlc.bp,i){ hidden.dim <- hlc.bp$hidden.structure$dim hidden.n.var <- hlc.bp$hidden.structure$n.var hidden.adj <- hlc.bp$hidden.structure$adj n.evidence <- hlc.bp$n.evidence parent <- which(hidden.adj[,i]==1) if(identical(parent,integer(0))){ pi <- hlc.bp$hidden.structure$cpt[i,1:hidden.dim[i],] pi <- rep(1,n.evidence)%o%margin.table(pi,1) }else{ cpt <- hlc.bp$hidden.structure$cpt[i,1:hidden.dim[i],1:hidden.dim[parent]] pi.m <- hlc.bp$hidden$pi.m[,parent,i,1:hidden.dim[parent]] if(hlc.bp$n.evidence == 1) pi.m <- 1 %o% pi.m pi <- rep(1,n.evidence)%o%cpt * aperm(rep(1,hidden.dim[i])%o%pi.m,c(2,1,3)) pi <- margin.table(pi,c(1,2)) } hlc.bp$hidden$pi[,i,1:hidden.dim[i]] <- pi return(hlc.bp) } # 子ノードからのλメッセージとλ分布から親へのλメッセージを計算する関数. calc.lambda.m <- function(hlc.bp,i){ hidden.dim <- hlc.bp$hidden.structure$dim hidden.n.var <- hlc.bp$hidden.structure$n.var hidden.adj <- hlc.bp$hidden.structure$adj n.evidence <- hlc.bp$n.evidence parent <- which(hidden.adj[,i]==1) if(!identical(parent,integer(0))){ cpt <- hlc.bp$hidden.structure$cpt[i,1:hidden.dim[i],1:hidden.dim[parent]] lambda <- hlc.bp$hidden$lambda[,i,1:hidden.dim[i]] if(hlc.bp$n.evidence==1) lambda <- 1 %o% lambda lambda.m <- (lambda%o%rep(1,max(hidden.dim[parent])))*(rep(1,n.evidence)%o%cpt) lambda.m <- marginalize(lambda.m,2) hlc.bp$hidden$lambda.m[,i,1:hidden.dim[parent]] <- lambda.m } return(hlc.bp) } # 親ノードからのπメッセージ,子からのλメッセージ,π分布から子へのπメッセージを計算する関数. calc.pi.m <- function(hlc.bp,i){ hidden.dim <- hlc.bp$hidden.structure$dim hidden.n.var <- hlc.bp$hidden.structure$n.var hidden.adj <- hlc.bp$hidden.structure$adj n.evidence <- hlc.bp$n.evidence h.children <- which(hidden.adj[i,]==1) o.children <- which(hlc.bp$observed.structure$parent==i) if(!identical(h.children,integer(0))){ h.lambda.ms <- hlc.bp$hidden$lambda.m[,h.children,1:hidden.dim[i]] o.lambda.ms <- hlc.bp$observed$lambda.m[,o.children,1:hidden.dim[i]] pi <- hlc.bp$hidden$pi[,i,1:hidden.dim[i]] if(hlc.bp$n.evidence==1){ h.lambda.ms <- 1 %o% h.lambda.ms o.lambda.ms <- 1 %o% o.lambda.ms pi = 1 %o% pi } if(identical(o.children,integer(0))) pi.ms <- pi * lambda.prod(h.lambda.ms) else pi.ms <- pi * lambda.prod(h.lambda.ms) * lambda.prod(o.lambda.ms) if(length(h.children)!=1) pi.ms <- aperm(pi.ms%o%rep(1,length(h.children)),c(1,3,2)) pi.ms <- pi.ms / h.lambda.ms; pi.ms[is.nan(pi.ms)] <- 0 hlc.bp$hidden$pi.m[,i,h.children,1:hidden.dim[i]] <- pi.ms } return(hlc.bp) } # hlcオブジェクトとデータ行列からhlc.bpオブジェクトを得る. hlc.bp <- set.hlc.bp(hlc,data) hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim root <- hlc$hidden.structure$root.node n.evidence <- hlc.bp$n.evidence observed.parent <- hlc$observed.structure$parent ### 確率伝搬法による計算の開始. ### p.time1 <- proc.time() # 葉ノードからルートノードに向けてのλメッセージパッシング. observed.post.cpt <- rep(1,hlc.bp$n.evidence) %o% hlc$observed.structure$cpt * hlc.bp$evidences2 o.lambda.m <- marginalize(observed.post.cpt,3) hlc.bp$observed$lambda.m <- o.lambda.m for(i in hlc.bp$BP.order){ hlc.bp <- calc.lambda(hlc.bp,i) hlc.bp <- calc.lambda.m(hlc.bp,i) } # ルートノードから葉ノードに向けてのπメッセージパッシング. for(i in rev(hlc.bp$BP.order)){ hlc.bp <- calc.pi(hlc.bp,i) hlc.bp <- calc.pi.m(hlc.bp,i) } # π分布とλ分布から各潜在変数の事後分布(または周辺分布)を計算. hidden.posterior <- prop.table(hlc.bp$hidden$lambda*hlc.bp$hidden$pi,c(1,2)) # エビデンスの周辺確率P(e)を計算. lambda.pi.root <- hlc.bp$hidden$lambda[,root,]*hlc.bp$hidden$pi[,root,] if(hlc.bp$n.evidence==1) lambda.pi.root <- 1 %o% lambda.pi.root p.evidence <- marginalize(lambda.pi.root,2) # 潜在変数の事後条件付確率分布P(Ti|Pa(Ti),e)と事後結合確率分布P(Ti,Pa(Ti)|e)を計算. hidden.post.CPT <- (rep(1,n.evidence) %o% hlc$hidden.structure$cpt) * (hlc.bp$hidden$lambda %o% rep(1,max(hidden.dim))) hidden.post.CPT <- prop.table(hidden.post.CPT,c(1,2,4)) hidden.post.CPT[is.na(hidden.post.CPT)] <- 0 hidden.post.joint <- hidden.post.CPT hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max); hidden.parent <- hidden.parent[-root] if(hlc.bp$hidden.structure$n.var==2){ hpC <- hidden.post.CPT[,-root,,] hidden.posterior.hp <- hidden.posterior[,hidden.parent,1:max(hidden.dim)] if(hlc.bp$n.evidence==1){hpC <- 1%o% hpC; hidden.posterior.hp <- 1 %o% hidden.posterior.hp} hidden.post.joint[,-root,,] <- hpC * aperm(hidden.posterior.hp%o%rep(1,max(hidden.dim)),c(1,3,2)) }else if(hlc.bp$hidden.structure$n.var > 2){ hpC <- hidden.post.CPT[,-root,,] hidden.posterior.hp <- hidden.posterior[,hidden.parent,1:max(hidden.dim)] if(hlc.bp$n.evidence==1){hpC <- 1%o% hpC; hidden.posterior.hp <- 1 %o% hidden.posterior.hp} hidden.post.joint[,-root,,] <- hpC * aperm(hidden.posterior.hp%o%rep(1,max(hidden.dim)),c(1,2,4,3)) } hidden.posterior.op <- hidden.posterior[,observed.parent,] if(hlc.bp$n.evidence==1) hidden.posterior.op <- 1 %o% hidden.posterior.op observed.post.joint <- observed.post.cpt*aperm(rep(1,max(observed.dim))%o%hidden.posterior.op,c(2,3,1,4)) p.time2 <- proc.time() result <- list() result$hidden$posterior <- hidden.posterior; result$hidden$post.CPT <- hidden.post.CPT result$hidden$post.joint <- hidden.post.joint; result$observed$post.joint <- observed.post.joint result$n.evidence <- hlc.bp$n.evidence; result$BP.order <- hlc.bp$BP.order result$p.evidence <- p.evidence result$System.time <- p.time2 - p.time1 # set class. class(result) <- "bp.hlc" return(result) } print.bp.hlc <- function(bp.hlc){ cat("Result of Belief Propagation.\n") cat("*** number of evidences:",bp.hlc$n.evidence,"\n") cat("*** calculation time:",bp.hlc$System.time,"\n") } ### 相互情報量に基づく距離を計算する関数. # IB-EM法で用いられる経験分布Qを用いて計算される. # MI.d: [Cover & Thoms,1991]の相互情報量距離. # MI.D: [Li et al.,2002]の相互情報量距離. calc.MI.distance <- function(Q.X..Y,Q.Y,H.X){ o.n.var <- dim(Q.X..Y)[1] max.observed.dim <- dim(Q.X..Y)[2] # 全ての変数組に対して距離d(Xi,Xj)と距離D(Xi,Xj)を計算. mutual.information <- matrix(1,ncol=o.n.var,nrow=o.n.var) MI.d <- mutual.information MI.D <- mutual.information joint.entoropy <- mutual.information Q.Y <- rep(1,max.observed.dim)%o%rep(1,max.observed.dim)%o%Q.Y for(i in 1:(o.n.var-1)){ for(j in (i+1):o.n.var){ Q.Xi..Y <- aperm(Q.X..Y[i,,]%o%rep(1,max.observed.dim),c(1,3,2)) Q.Xj..Y <- aperm(Q.X..Y[j,,]%o%rep(1,max.observed.dim),c(3,1,2)) Q.XiXj <- marginalize(Q.Xi..Y*Q.Xj..Y*Q.Y,3) joint.entoropy[i,j] <- -sum(ifelse(Q.XiXj==0,0,Q.XiXj*log(Q.XiXj))) mutual.information[i,j] <- H.X[i] + H.X[j] - joint.entoropy[i,j] } } MI.d <- joint.entoropy - mutual.information MI.D <- MI.d / joint.entoropy MI.d <- t(MI.d); MI.d <- MI.d[MI.d!=0] MI.D <- t(MI.D); MI.D <- MI.D[MI.D!=0] attr(MI.d,"Size") <- o.n.var attr(MI.D,"Size") <- o.n.var return(list("MI.d"=MI.d,"MI.D"=MI.D)) } ### データフレームを関数HLC.IBEMの内部で用いる異なるデータ構造へ変換したものを返す関数. # この関数はHLC.IBEM内のみで用いられる. # 入力される"data"はデータフレーム, 出力される"data"は因子を数値化した数値行列. get.dataset <- function(hlc,data,old.dataset=NULL){ hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim h.n.var <- hlc$hidden.structure$n.var o.n.var <- hlc$observed.structure$n.var n.data <- dim(data)[1] observed.alpha <- get.alpha.table(hlc,prior="database",alpha=1)$observed.alpha if(identical(old.dataset,NULL)){ ### Yについてのデータ構造に変換(Yはinstance identity). data.Y <- unlist(data[,1]) for(i in 2:o.n.var) data.Y <- paste(data.Y,unlist(data[,i]),sep=",") table.Y <- table(data.Y); dim.Y <- length(table.Y) }else{ data.Y <- old.dataset$data.Y; table.Y <- old.dataset$table.Y dim.Y <- old.dataset$dim.Y } data <- do.call("cbind",data) # translate data.frame to numeric matrix. # data2はP(Xi|Pa(Xi),D)を表しているのに等しい. d2 <- data%o%rep(1,max(observed.dim))%o%rep(1,max(hidden.dim)) d2.2 <- array(1:max(observed.dim),dim=c(max(observed.dim),o.n.var,max(hidden.dim),n.data)) d2.2 <- aperm(d2.2,c(4,2,1,3)) data2 <- ifelse(d2==d2.2,1,0) # NA(missing value)がエビデンスとして与えられたときは, P(Xi|Pa(Xi),D)を一様分布として設定. data2[is.na(data2)] <- (rep(1,n.data) %o% observed.alpha)[is.na(data2)] # data.Y2は{xi1[y],xi2[y],...,xin[y]}という形をデータ行列. ty <- do.call("rbind",strsplit(dimnames(table.Y)[[1]],",")) ty[ty=="NA"] <- 0 ty <- matrix(as.numeric(ty),nrow=dim.Y) ty[ty==0] <- NA data.Y2 <- ty # data.Y3はP(Xi|Pa(Xi),Y)を表しているのに等しい. dy <- aperm(data.Y2%o%rep(1,max(observed.dim))%o%rep(1,max(hidden.dim)),c(2,3,4,1)) dy2 <- array(1:max(observed.dim),dim=c(max(observed.dim),o.n.var,max(hidden.dim),dim.Y)) dy2 <- aperm(dy2,c(2,1,3,4)) data.Y3 <- ifelse(dy==dy2,1,0) data.Y3[is.na(data.Y3)] <- (observed.alpha %o% rep(1,dim.Y))[is.na(data.Y3)] result <- list("data"=data,"data2"=data2,"data.Y"=data.Y,"table.Y"=table.Y,"dim.Y"=dim.Y, "data.Y2"=data.Y2,"data.Y3"=data.Y3,"n.data"=n.data) return(result) } ### BDeスコアを計算する関数. calc.BDe <- function(observed.table,hidden.table,observed.alpha,hidden.alpha){ lgamma2 <- function(x){x[x!=0] = lgamma(x[x!=0]); return(x)} observed.alpha.ij <- marginalize(observed.alpha,2) observed.table.ij <- marginalize(observed.table,2) o.lnp.D....m.1 <- lgamma2(observed.alpha.ij) - lgamma2(observed.alpha.ij+observed.table.ij) o.lnp.D....m.2 <- marginalize(lgamma2(observed.alpha+observed.table)-lgamma2(observed.alpha),2) observed.BDe <- sum(o.lnp.D....m.1 + o.lnp.D....m.2) hidden.alpha.ij <- marginalize(hidden.alpha,2) hidden.table.ij <- marginalize(hidden.table,2) h.lnp.D....m.1 <- lgamma2(hidden.alpha.ij) - lgamma2(hidden.alpha.ij+hidden.table.ij) h.lnp.D....m.2 <- marginalize(lgamma2(hidden.alpha+hidden.table)-lgamma2(hidden.alpha),2) hidden.BDe <- sum(h.lnp.D....m.1 + h.lnp.D....m.2) return(observed.BDe+hidden.BDe) } ### Cheeseman-Stutzスコアを計算する関数. # EM.result: 関数calc.EMが返すリスト. # dataset: HLC.IBEM内で用いられるdatasetオブジェクト. calc.CS <- function(EM.result,dataset){ n.data <- dataset$n.data data2 <- dataset$data2 observed.table <- EM.result$observed.table; hidden.table = EM.result$hidden.table observed.alpha <- EM.result$observed.alpha; hidden.alpha = EM.result$hidden.alpha # 期待対数尤度E[log likelihood]とBDeスコアを計算. exp.loglikelihood <- EM.result$E..Q.lnP.XT * dataset$n.data BDe <- calc.BDe(observed.table,hidden.table,observed.alpha,hidden.alpha) # 推定パラメタの下での周辺対数尤度P(D|θ',m)を確率伝搬法により計算. evidences <- dataset$data.Y2 evidences[is.na(evidences)] <- 0 BP.result <- HLC.BP(EM.result$hlc,evidences) marginal.lnp.D..t.m <- sum(log(BP.result$p.evidence)*dataset$table.Y) CS <- BDe - exp.loglikelihood + marginal.lnp.D..t.m #print(c(marginal.lnp.D..t.m)) return(list("marginal.loglikelihood"=marginal.lnp.D..t.m,"exp.loglikelihood"=exp.loglikelihood, "BDe"=BDe,"CS"=CS)) } ### 関数HLC.IBEM内で分布Qにおける相互情報量と相互情報量距離を計算するための関数. calc.Q..MI.distances <- function(hlc,MI.TY,Q.X..Y,Q.T..Y,Q.TY,Q.Y,H.X){ observed.dim <- hlc$observed.structure$dim hidden.dim <- hlc$hidden.structure$dim o.n.var <- hlc$observed.structure$n.var h.n.var <- hlc$hidden.structure$n.var observed.parent <- hlc$observed.structure$parent Q.X..Y <- aperm(rep(1,max(hidden.dim))%o%Q.X..Y,c(2,3,1,4)) Q.T..Y.o <- aperm(rep(1,max(observed.dim))%o%Q.T..Y[observed.parent,,],c(2,1,3,4)) Q.XT..Y <- Q.X..Y * Q.T..Y.o Q.XTY <- Q.XT..Y * rep(1,o.n.var)%o%rep(1,max(observed.dim))%o%rep(1,max(hidden.dim))%o%Q.Y Q.XT <- marginalize(Q.XTY,4); Q.X..T = prop.table(Q.XT,c(1,3)) H.X..T <- -margin.table(ifelse(Q.XT==0,0,Q.XT*log(Q.X..T)),1) H.XT <- -margin.table(ifelse(Q.XT==0,0,Q.XT*log(Q.XT)),1) MI.XT <- H.X - H.X..T; MI.d.XT = H.XT - MI.XT; MI.D.XT = MI.d.XT/H.XT H.TY <- -margin.table(ifelse(Q.TY==0,0,Q.TY*log(Q.TY)),1) MI.XT.data <- array(0,dim=c(3,o.n.var)); MI.TY.data <- array(0,dim=c(4,h.n.var)) MI.TY.data[1,] <- MI.TY; MI.TY.data[2,] = MI.TY / H.TY MI.TY.data[3,] <- H.TY-MI.TY; MI.TY.data[4,] <- 1-(MI.TY/H.TY) MI.XT.data <- array(0,dim=c(3,o.n.var)) MI.XT.data[1,] <- MI.XT; MI.XT.data[2,] <- MI.d.XT; MI.XT.data[3,] <- MI.D.XT return(list("MI.XT"=MI.XT.data,"MI.TY"=MI.TY.data)) } ### 関数HLC.IBEM内で分布Pにおける相互情報量と相互情報量距離を計算するための関数. calc.P..MI.distances <- function(hlc,P.XT,P.TPaT,P.T,H.X){ o.n.var <- hlc$observed.structure$n.var h.n.var <- hlc$hidden.structure$n.var # 相互情報量I(X:T)を計算. P.X..T <- hlc$observed.structure$cpt H.X..T <- -margin.table(ifelse(P.XT==0,0,P.XT*log(P.X..T)),1) H.XT <- -margin.table(ifelse(P.XT==0,0,P.XT*log(P.XT)),1) MI.XT <- H.X - H.X..T; MI.XT[MI.XT<0] <- 0 MI.XT.d <- H.XT - MI.XT; MI.XT.D <- MI.XT.d/H.XT # 相互情報量I(T:Pa(T))を計算. if(hlc$hidden.structure$n.var!=1){ P.T..PaT <- hlc$hidden.structure$cpt H.T <- -margin.table(ifelse(P.T==0,0,P.T*log(P.T)),1) H.T..PaT <- -margin.table(ifelse(P.T..PaT==0,0,P.TPaT*log(P.T..PaT)),1) H.TPaT <- -margin.table(ifelse(P.T..PaT==0,0,P.TPaT*log(P.TPaT)),1) MI.TPaT <- H.T - H.T..PaT; MI.TPaT[MI.TPaT<0] <- 0 MI.TPaT.d <- H.TPaT - MI.TPaT; MI.TPaT.D <- MI.TPaT.d/H.TPaT }else{ H.T <- -sum(ifelse(P.T==0,0,P.T*log(P.T))) MI.TPaT <- 0; MI.TPaT.d <- 0; MI.TPaT.D <- 0 H.TPaT <- 0; H.T..PaT <- 0 } # MI....data[type,variable index]. type: MI = 1, MI.d = 2, MI.D = 3. # H.XT.data[type,variable index]. type: H.XT = 1, H.X..T = 2. MI.XT.data <- array(0,dim=c(3,o.n.var)) H.XT.data <- array(0,dim=c(2,o.n.var)); H.TPaT.data <- array(0,dim=c(2,h.n.var)) MI.TPaT.data <- array(0,dim=c(3,h.n.var)); MI.TY.data <- MI.TPaT.data MI.XT.data[1,] <- MI.XT; MI.XT.data[2,] <- MI.XT.d; MI.XT.data[3,] <- MI.XT.D MI.TPaT.data[1,] <- MI.TPaT; MI.TPaT.data[2,] <- MI.TPaT.d; MI.TPaT.data[3,] <- MI.TPaT.D H.XT.data[1,] <- H.XT; H.XT.data[2,] <- H.X..T H.TPaT.data[1,] <- H.TPaT; H.TPaT.data[2,] <- H.T..PaT result = list("MI.XT"=MI.XT.data, "MI.TPaT"=MI.TPaT.data, "H.XT"=H.XT.data, "H.TPaT"=H.TPaT.data, "H.T"=H.T) return(result) } ### IB-EM法のE-stepにおけるEP(t.i,y)を計算する関数. HLC.IBEM.EP <- function(hlc,Q.T..Y,P.xi..Ti){ observed.parent <- hlc$observed.structure$parent o.parent.nodes <- unique(observed.parent) h.n.var <- hlc$hidden.structure$n.var root <- hlc$hidden.structure$root.node hidden.dim <- hlc$hidden.structure$dim hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max) h.parent.nodes <- unique(hidden.parent[-root]) dim.Y <- dim(Q.T..Y)[3] # E[ln(P(Ti|Pa(Ti)))]の計算. h.cpt <- hlc$hidden.structure$cpt %o% rep(1,dim.Y) Q.T..Y2 <- Q.T..Y[hidden.parent,,] if(length(hidden.parent)==1) Q.T..Y2 <- 1%o%Q.T..Y2 Q.T..Y2 <- aperm(Q.T..Y2%o%rep(1,max(hidden.dim)),c(1,4,2,3)) E.lnP.Ti..PaTi <- marginalize(ifelse(h.cpt==0,h.cpt,log(h.cpt))*Q.T..Y2,3) E.lnP.Ti..PaTi[root,,] <- ifelse(h.cpt[root,,1,]==0,h.cpt[root,,1,],log(h.cpt[root,,1,])) # E[ln(P(Ch(Ti)|Ti))] または ln(P(Xi|Ti))の計算. hidden.parent[root] <- 0 Q.T..Y3 <- aperm(Q.T..Y%o%rep(1,max(hidden.dim)),c(1,2,4,3)) E.lnP.ChTi..Ti0 <- marginalize(ifelse(h.cpt==0,h.cpt,log(h.cpt))*Q.T..Y3,2) E.lnP.ChTi..Ti <- array(0,dim=dim(E.lnP.ChTi..Ti0)) for(hp in h.parent.nodes){ h.vars.hp <- which(hidden.parent==hp) if(length(h.vars.hp)==1){ E.lnP.ChTi..Ti[hp,1:hidden.dim[hp],] <- marginalize(1%o%E.lnP.ChTi..Ti0[h.vars.hp,1:hidden.dim[hp],],1) }else{ E.lnP.ChTi..Ti[hp,1:hidden.dim[hp],] <- marginalize(E.lnP.ChTi..Ti0[h.vars.hp,1:hidden.dim[hp],],1) } } lnP.Xi..Ti <- log(marginalize(P.xi..Ti,2)); lnP.Xi..Ti[lnP.Xi..Ti==-Inf] <- 0 for(op in o.parent.nodes){ o.vars.op <- which(observed.parent==op) if(length(o.vars.op)==1) a <- lnP.Xi..Ti[o.vars.op,1:hidden.dim[op],] else a <- marginalize(lnP.Xi..Ti[o.vars.op,1:hidden.dim[op],],1) E.lnP.ChTi..Ti[op,1:hidden.dim[op],] <- E.lnP.ChTi..Ti[op,1:hidden.dim[op],] + a } # EP(Ti,Y) (= E[ln(P(X,T))])の計算. EP <- E.lnP.Ti..PaTi + E.lnP.ChTi..Ti return(EP) } ### IB-EM法のM-stepにおける十分統計量を算出するために必要なP(X,Pa(X))の分布Qによる期待値を計算する関数. HLC.EXP.JOINT <- function(hlc,Q.T..Y,P.xi..Ti){ observed.parent <- hlc$observed.structure$parent o.parent.nodes <- unique(observed.parent) h.n.var <- hlc$hidden.structure$n.var root <- hlc$hidden.structure$root.node hidden.dim <- hlc$hidden.structure$dim hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max) h.parent.nodes <- unique(hidden.parent[-root]) dim.Y <- dim(Q.T..Y)[3] h.cpt <- hlc$hidden.structure$cpt %o% rep(1,dim.Y) hidden.parent2 <- hidden.parent; hidden.parent2[root] <- 0 Q.T..Y3 <- aperm(Q.T..Y%o%rep(1,max(hidden.dim)),c(1,2,4,3)) E.P.ChTi..Ti <- marginalize(h.cpt*Q.T..Y3,2) ln.E.P.ChTi..Ti0 <- ifelse(E.P.ChTi..Ti==0,E.P.ChTi..Ti,log(E.P.ChTi..Ti)) ln.E.P.ChTi..Ti <- array(0,dim=dim(ln.E.P.ChTi..Ti0)) for(hp in h.parent.nodes){ h.vars.hp <- which(hidden.parent2==hp) if(length(h.vars.hp)==1){ ln.E.P.ChTi..Ti[hp,1:hidden.dim[hp],] <- marginalize(1%o%ln.E.P.ChTi..Ti0[h.vars.hp,1:hidden.dim[hp],],1) }else{ ln.E.P.ChTi..Ti[hp,1:hidden.dim[hp],] <- marginalize(ln.E.P.ChTi..Ti0[h.vars.hp,1:hidden.dim[hp],],1) } } lnP.Xi..Ti <- log(marginalize(P.xi..Ti,2)); lnP.Xi..Ti[lnP.Xi..Ti==-Inf] <- 0 for(op in o.parent.nodes){ o.vars.op <- which(observed.parent==op) if(length(o.vars.op)==1) a <- lnP.Xi..Ti[o.vars.op,1:hidden.dim[op],] else a <- marginalize(lnP.Xi..Ti[o.vars.op,1:hidden.dim[op],],1) ln.E.P.ChTi..Ti[op,1:hidden.dim[op],] <- ln.E.P.ChTi..Ti[op,1:hidden.dim[op],] + a } E.P.ChTi..Ti <- ifelse(ln.E.P.ChTi..Ti==0,ln.E.P.ChTi..Ti,exp(ln.E.P.ChTi..Ti)) E.P.ChTi..Ti <- aperm(E.P.ChTi..Ti%o%rep(1,max(hidden.dim)),c(1,2,4,3)) Q.PaTi..Y <- Q.T..Y[hidden.parent,,] if(length(hidden.parent)==1) Q.PaTi..Y <- 1%o%Q.PaTi..Y Q.PaTi..Y <- aperm(Q.PaTi..Y%o%rep(1,max(hidden.dim)),c(1,4,2,3)) exp.jointP.h <- prop.table(E.P.ChTi..Ti * h.cpt * Q.PaTi..Y,c(1,4)) exp.jointP.h[root,,-1,] <- 0 #exp.jointP.h[root,,1,] <- prop.table(E.P.ChTi..Ti[root,,1,]*h.cpt[root,,1,],2) exp.jointP.h[root,,1,] <- Q.T..Y[root,,] return(exp.jointP.h) } ### 与えられたγの値の下で,IB-EM法によるパラメタ推定計算を行う関数. calc.EM = function(Q.T..Y,Q.Y,hlc,gamma,prior,alpha,dataset,EM.eps){ n.data <- dataset$n.data; data.Y3 <- dataset$data.Y3; dim.Y <- dataset$dim.Y observed.dim <- hlc$observed.structure$dim; hidden.dim <- hlc$hidden.structure$dim o.n.var <- hlc$observed.structure$n.var; h.n.var <- hlc$hidden.structure$n.var observed.parent <- hlc$observed.structure$parent alpha.table <- get.alpha.table(hlc,prior,alpha) observed.alpha <- alpha.table$observed.alpha hidden.alpha <- alpha.table$hidden.alpha rate <- -Inf; old.Lagrangian <- Inf; EM.count <- 1 while(rate < (1-EM.eps)){ ### E-step: Q(T|Y)を計算. ##### Q.TY <- Q.T..Y*(rep(1,h.n.var)%o%rep(1,max(hidden.dim))%o%Q.Y) Q.T <- marginalize(Q.TY,3) # EP(ti,y) を計算し Q(T|Y) を更新. P.xi..Ti <- hlc$observed.structure$cpt %o% rep(1,dim.Y) * data.Y3 EP <- HLC.IBEM.EP(hlc,Q.T..Y,P.xi..Ti) new.Q.T..Y <- (Q.T^(1-gamma))%o%rep(1,dim.Y) * ifelse(EP==0,0,exp(gamma*EP)) new.Q.T..Y <- prop.table(new.Q.T..Y,c(1,3)); new.Q.T..Y[is.nan(new.Q.T..Y)] <- 0 ### M-step: 分布Q固定の元でラグラジアンを最小化する分布Pを求める. new.Q.T..Y.o <- new.Q.T..Y[observed.parent,,] exp.jointP.o <- data.Y3*aperm(new.Q.T..Y.o%o%rep(1,max(observed.dim)),c(1,4,2,3)) # E[P(Ti,Pa(Ti))] <- E..Q(Ch(Ti)|y)[P(Ch(Ti)|Ti)]*P(Ti|Pa(Ti))*Q(Pa(Ti)|y)を計算. exp.jointP.h <- HLC.EXP.JOINT(hlc,Q.T..Y,P.xi..Ti) # 確率分布P(Ti|Pa..Ti) と P(X|T) のパラメタの推定値を十分統計量の期待値から求める. Q.Y.h <- rep(1,h.n.var)%o%rep(1,max(hidden.dim))%o%rep(1,max(hidden.dim))%o%Q.Y Q.Y.o <- rep(1,o.n.var)%o%rep(1,max(observed.dim))%o%rep(1,max(hidden.dim))%o%Q.Y hidden.table <- marginalize(exp.jointP.h*Q.Y.h,4) * n.data hidden.table.alpha <- hidden.table + hidden.alpha new.hidden.cpt <- prop.table(hidden.table.alpha,c(1,3)) observed.table <- marginalize(exp.jointP.o*Q.Y.o,4) * n.data observed.table.alpha <- observed.table + observed.alpha new.observed.cpt <- prop.table(observed.table.alpha,c(1,3)) new.hidden.cpt[is.nan(new.hidden.cpt)] <- 0; new.observed.cpt[is.nan(new.observed.cpt)] <- 0 # ラグラジアンの値を計算する. exp.ll.hidden <- sum(ifelse(new.hidden.cpt!=0,hidden.table*log(new.hidden.cpt),0)) exp.ll.observed <- sum(ifelse(new.observed.cpt!=0,observed.table*log(new.observed.cpt),0)) E..Q.lnP.XT <- (exp.ll.hidden + exp.ll.observed)/n.data result.Lagrangian <- calc.Lagrangian(Q.T,Q.T..Y,Q.TY,E..Q.lnP.XT,gamma) Lagrangian <- result.Lagrangian$Lagrangian rate <- Lagrangian / old.Lagrangian if(is.nan(rate)) rate <- 1 old.Lagrangian <- Lagrangian # update CPTs. hlc$hidden.structure$cpt <- new.hidden.cpt hlc$observed.structure$cpt <- new.observed.cpt Q.T..Y <- new.Q.T..Y EM.count <- EM.count + 1 } result <- list("hlc"=hlc,"Q.T..Y"=Q.T..Y,"Q.TY"=Q.TY,"result.Lagrangian"=result.Lagrangian,"EM.count"=EM.count, "E..Q.lnP.XT"=E..Q.lnP.XT,"observed.table"=observed.table,"hidden.table"=hidden.table) result$observed.alpha <- observed.alpha; result$hidden.alpha <- hidden.alpha return(result) } ### IB-EM法のラグラジアンを計算(平均場近似版). calc.Lagrangian <- function(Q.T,Q.T..Y,Q.TY,E..Q.lnP.XT,gamma){ # Σi(I..Q(Ti;Y))を計算. H.Ti <- -margin.table(ifelse(Q.T==0,0,Q.T*log(Q.T)),1) H.Ti..Y <- -margin.table(ifelse(Q.T..Y==0,0,Q.TY*log(Q.T..Y)),1) MI.TiY <- H.Ti - H.Ti..Y # ラグラジアンを計算. Lagrangian <- sum(MI.TiY) - gamma*(E..Q.lnP.XT + sum(H.Ti)) result <- list("Lagrangian"=Lagrangian,"MI.TiY"=MI.TiY) return(result) } ### [Elidan&Friedman'06]による潜在変数のクラスの推定を行う関数. # 効果的なクラス数であるかを判定し,効果的なクラス数に達していると判断した場合は新しい"spare"クラスを追加する. adjust.cardinality <- function(hlc,Q.T..Y,EM.result,dataset,display){ observed.table <- EM.result$observed.table; hidden.table <- EM.result$hidden.table observed.alpha <- EM.result$observed.alpha; hidden.alpha <- EM.result$hidden.alpha # 潜在変数Tiが効果的なクラス数であるか(十分に"utilized"されているか)を判定する関数. is.effective.cardinality <- function(index,hlc,observed.table,hidden.table,observed.alpha,hidden.alpha){ hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim h.n.var <- hlc$hidden.structure$n.var o.n.var <- hlc$observed.structure$n.var hidden.adj <- hlc$hidden.structure$adj observed.parent <- hlc$observed.structure$parent # 潜在変数Tiのlocal structureのみを取り出したhlcオブシェクトを設定. Ti.hidden.children <- which(hlc$hidden.structure$adj[index,]==1) Ti.observed.children <- which(hlc$observed.structure$parent==index) Ti.h.c.table <- hidden.table[Ti.hidden.children,,] Ti.o.c.table <- observed.table[Ti.observed.children,,] Ti.h.c.alpha <- hidden.alpha[Ti.hidden.children,,] Ti.o.c.alpha <- observed.alpha[Ti.observed.children,,] n.h.c <- length(Ti.hidden.children) n.o.c <- length(Ti.observed.children) Pa.Ti <- which(hidden.adj[,index]==1) Ti.dim <- hidden.dim[index] Ti.Pa.table <- 1 %o% hidden.table[index,,] Ti.Pa.alpha <- 1 %o% hidden.alpha[index,,] if(n.h.c==0){ Ti.Ch.table <- Ti.o.c.table Ti.Ch.alpha <- Ti.o.c.alpha if(n.o.c==1){ Ti.Ch.table <- 1 %o% Ti.Ch.table Ti.Ch.alpha <- 1 %o% Ti.Ch.alpha } }else{ n.c <- n.h.c + n.o.c max.dim <- max(c(hidden.dim,observed.dim)) Ti.Ch.table <- array(0,dim=c(n.h.c+n.o.c,max.dim,max(hidden.dim))) Ti.Ch.alpha <- Ti.Ch.table Ti.Ch.table[1:n.o.c,1:max(observed.dim),1:max(hidden.dim)] <- Ti.o.c.table Ti.Ch.table[(n.o.c+1):n.c,1:max(hidden.dim),1:max(hidden.dim)] <- Ti.h.c.table Ti.Ch.alpha[1:n.o.c,1:max(observed.dim),1:max(hidden.dim)] <- Ti.o.c.alpha Ti.Ch.alpha[(n.o.c+1):n.c,1:max(hidden.dim),1:max(hidden.dim)] <- Ti.h.c.alpha } # クラスiとjを併合していない潜在変数Tiのlocal structureのBDeを計算. BDe.notMerge <- calc.BDe(Ti.Ch.table,Ti.Pa.table,Ti.Ch.alpha,Ti.Pa.alpha) # 全てのi.jの組に対して,クラスiとjを併合した潜在変数Tiのlocal structureのBDeを計算. ec.flag <- FALSE for(i in 1:(Ti.dim-1)){ for(j in (i+1):Ti.dim){ merged.Ti.Ch.table <- Ti.Ch.table; merged.Ti.Pa.table <- Ti.Pa.table merged.Ti.Ch.alpha <- Ti.Ch.alpha; merged.Ti.Pa.alpha <- Ti.Pa.alpha merged.Ti.Ch.table[,,i] <- Ti.Ch.table[,,i] + Ti.Ch.table[,,j] merged.Ti.Ch.alpha[,,i] <- Ti.Ch.alpha[,,i] + Ti.Ch.alpha[,,j] merged.Ti.Pa.table[,i,] <- Ti.Pa.table[,i,] + Ti.Pa.table[,j,] merged.Ti.Pa.alpha[,i,] <- Ti.Pa.alpha[,i,] + Ti.Pa.alpha[,j,] merged.Ti.Ch.table[,,j] <- merged.Ti.Ch.table[,,Ti.dim] merged.Ti.Pa.table[,j,] <- merged.Ti.Pa.table[,Ti.dim,] merged.Ti.Ch.table[,,Ti.dim] <- 0; merged.Ti.Pa.table[,Ti.dim,] <- 0 merged.Ti.Ch.alpha[,,j] <- merged.Ti.Ch.alpha[,,Ti.dim] merged.Ti.Pa.alpha[,j,] <- merged.Ti.Pa.alpha[,Ti.dim,] merged.Ti.Ch.alpha[,,Ti.dim] <- 0; merged.Ti.Pa.alpha[,Ti.dim,] <- 0 BDe.Merge <- calc.BDe(merged.Ti.Ch.table,merged.Ti.Pa.table,merged.Ti.Ch.alpha,merged.Ti.Pa.alpha) # もしBDe.Merge > BDe.notMerge, 潜在変数Tiはまだ十分に"utlized"されていないと判断. if(BDe.Merge > BDe.notMerge){ec.flag = TRUE; break} } if(ec.flag) break } return(ec.flag) } add.spare.state <- function(index,hlc,Q.T..Y,dataset,display){ ### 新しい"spare"クラスを追加する. new.hlc <- hlc hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim Pa.Ti <- which(hlc$hidden.structure$adj[,index]==1) Ti.dim <- hidden.dim[index] h.n.var <- hlc$hidden.structure$n.var o.n.var <- hlc$observed.structure$n.var Ti.hidden.children <- which(hlc$hidden.structure$adj[index,]==1) Ti.observed.children <- which(hlc$observed.structure$parent==index) hidden.cpt <- hlc$hidden.structure$cpt observed.cpt <- hlc$observed.structure$cpt new.hlc$hidden.structure$dim[index] <- Ti.dim + 1 new.hidden.dim <- new.hlc$hidden.structure$dim if(display) cat("S",sep="") # 必要に応じて潜在変数のCPT配列の大きさをインクリメントする. if(max(hidden.dim) < (Ti.dim+1)){ new.hidden.cpt <- array(0,dim=c(h.n.var,Ti.dim+1,Ti.dim+1)) new.hidden.cpt[,1:max(hidden.dim),1:max(hidden.dim)] <- hidden.cpt new.observed.cpt <- array(0,dim=c(o.n.var,max(observed.dim),Ti.dim+1)) new.observed.cpt[,1:max(observed.dim),1:max(hidden.dim)] <- observed.cpt new.Q.T..Y <- array(0,dim=c(h.n.var,Ti.dim+1,dataset$dim.Y)) new.Q.T..Y[,1:max(hidden.dim),] <- Q.T..Y }else{ new.hidden.cpt <- hidden.cpt new.observed.cpt <- observed.cpt new.Q.T..Y <- Q.T..Y } # 新らたにCPT P(Ti|Pa(Ti))を設定する. if(identical(Pa.Ti,integer(0))) new.hidden.cpt[index,Ti.dim+1,1] <- 1/(Ti.dim+1) else new.hidden.cpt[index,Ti.dim+1,1:hidden.dim[Pa.Ti]] <- 1/(Ti.dim+1) new.hidden.cpt[index,,] <- prop.table(new.hidden.cpt[index,,],2) new.hidden.cpt[is.nan(new.hidden.cpt)] <- 0 # 新らたにCPT Q(Ti|Y)を設定する. new.Q.T..Y[index,Ti.dim+1,] <- 1/(Ti.dim+1) new.Q.T..Y[index,,] <- prop.table(new.Q.T..Y[index,,],2) # 新らたにCPT P(Ch(Ti)|Ti)を設定する. for(ch.ti in Ti.hidden.children){ # initialize P(Ch(Ti)|new.ti). #new.hidden.cpt[ch.ti,1:hidden.dim[ch.ti],Ti.dim+1] <- 1/hidden.dim[ch.ti] new.hidden.cpt[ch.ti,1:hidden.dim[ch.ti],Ti.dim+1] <- prop.table(runif(hidden.dim[ch.ti],min=0,max=1)) } for(ch.ti in Ti.observed.children){ # initialize P(Ch(Ti)|new.ti). #new.observed.cpt[ch.ti,1:observed.dim[ch.ti],Ti.dim+1] <- 1/observed.dim[ch.ti] new.observed.cpt[ch.ti,1:observed.dim[ch.ti],Ti.dim+1] <- prop.table(runif(observed.dim[ch.ti],min=0,max=1)) } new.hlc$hidden.structure$cpt <- new.hidden.cpt new.hlc$observed.structure$cpt <- new.observed.cpt result <- list("hlc"=new.hlc,"Q.T..Y"=new.Q.T..Y) return(result) } h.n.var <- hlc$hidden.structure$n.var ac <- 1:h.n.var for(i in order(hlc$hidden.structure$dim)){ ac[i] <- is.effective.cardinality(i,hlc,observed.table,hidden.table,observed.alpha,hidden.alpha) } for(i in which(ac==FALSE)){ ass.result <- add.spare.state(i,hlc,Q.T..Y,dataset,display) hlc <- ass.result$hlc Q.T..Y <- ass.result$Q.T..Y } return(list("hlc"=hlc,"Q.T..Y"=Q.T..Y,"ac"=ac)) } ### [Elidan&Friedman'06]にトポロジー情報を利用するよう改良した手法で潜在変数間の階層構造を推定する関数. # hclust.result: agne関数の返り値. adjust.hierarchy <- function(hlc,Q.T..Y,Q.Y,MI.P.data,sep.array,hclust.result,goal.clusters,display){ o.n.var <- hlc$observed.structure$n.var h.n.var <- hlc$hidden.structure$n.var hidden.dim <- hlc$hidden.structure$dim observed.parent <- hlc$observed.structure$parent o.parent.nodes <- unique(observed.parent) ah <- rep(TRUE,h.n.var) Q.T <- marginalize(Q.T..Y*(rep(1,h.n.var)%o%rep(1,max(hidden.dim))%o%Q.Y),3) n <- length(MI.P.data[,"H.XT"]) H.XT <- MI.P.data[,"H.XT"][[n]][2,] for(op in order(table(observed.parent),decreasing=TRUE)){ h.n.var <- hlc$hidden.structure$n.var hidden.dim <- hlc$hidden.structure$dim observed.parent <- hlc$observed.structure$parent hidden.parent <- unique(observed.parent) sep1 <- which(sep.array[op,1,-1]==1) sep2 <- which(sep.array[op,2,-1]==1) # トポロジー情報に基づいて高低エントロピー分割対象のクラスタを決定. gcl.flag <- FALSE for(i in 1:length(goal.clusters)){ if(length(goal.clusters[[i]])!=length(c(sep1,sep2))) next else if(all(sort(goal.clusters[[i]])==sort(c(sep1,sep2)))){ gcl.flag <- TRUE break } } if(gcl.flag) next H.sep1 <- H.XT[sep1] H.sep2 <- H.XT[sep2] if(mean(H.sep1) < mean(H.sep2)){ a <- H.sep1 H.sep1 <- H.sep2 H.sep2 <- a } #test.result <- try(t.test(H.sep1,H.sep2,equal=F,alternative="g"),silent=TRUE) #test.result <- try(wilcox.test(H.sep1,H.sep2,exact=TRUE,alternative="g"),silent=TRUE) #if(class(test.result) == "try-error") next # 2つのクラスターの条件付エントロピーに有意な差が生じたら新たな潜在変数を追加する. if(mean(H.sep1)*.99 > mean(H.sep2)){ # insert new hidden variable to Q.T..Y. new.Q.T..Y <- array(0,dim=c(h.n.var+1,dim(Q.T..Y)[-1])) new.Q.T..Y[1:h.n.var,,] <- Q.T..Y new.Q.T..Y[h.n.var+1,,] <- Q.T..Y[op,,] # hlcオブジェクトに新しい潜在変数を追加する. new.hlc <- hlc new.hlc$hidden.structure$n.var <- h.n.var + 1 new.hlc$hidden.structure$dim <- c(hidden.dim,hidden.dim[op]) new.h.adj <- matrix(0,nrow=h.n.var+1,ncol=h.n.var+1) new.h.adj[1:h.n.var,1:h.n.var] <- hlc$hidden.structure$adj new.h.adj[op,h.n.var+1] <- 1 new.hlc$hidden.structure$adj <- new.h.adj new.h.cpt <- array(0,dim=c(h.n.var+1,max(hidden.dim),max(hidden.dim))) new.h.cpt[1:h.n.var,,] <- hlc$hidden.structure$cpt new.h.cpt[h.n.var+1,,] <- Q.T[op,] %o% rep(1,max(hidden.dim)) new.hlc$hidden.structure$cpt <- new.h.cpt new.cl.num <- which.min(c(mean(H.XT[sep1]),mean(H.XT[sep2]))) op.cl.num <- ifelse(new.cl.num==1,2,1) new.o.var <- which(sep.array[op,new.cl.num,-1]==1) new.hlc$observed.structure$parent[new.o.var] <- h.n.var + 1 if(display) cat("V",sep="") # sep.arrayを再設定する. new.sep.array <- array(0,dim=c(h.n.var+1,3,o.n.var+1)) new.sep.array[1:h.n.var,,] <- sep.array new.cl <- sep.array[op,new.cl.num,1] op.cl <- sep.array[op,op.cl.num,1] if(op.cl > 0){ subtree.op.root1 <- hclust.result$merge[op.cl,1] subtree.op.root2 <- hclust.result$merge[op.cl,2] }else{ subtree.op.root1 <- op.cl subtree.op.root2 <- op.cl } if(new.cl > 0){ subtree.new.root1 <- hclust.result$merge[new.cl,1] subtree.new.root2 <- hclust.result$merge[new.cl,2] }else{ subtree.new.root1 <- new.cl subtree.new.root2 <- new.cl } new.sep.array[op,,] <- 0 new.sep.array[op,1,1] <- subtree.op.root1 new.sep.array[op,2,1] <- subtree.op.root2 new.sep.array[op,3,1] <- op.cl new.sep.array[op,1,get.subtree.vars(hclust.result,subtree.op.root1)+1] <- 1 new.sep.array[op,2,get.subtree.vars(hclust.result,subtree.op.root2)+1] <- 1 new.sep.array[op,3,-1] <- new.sep.array[op,1,-1] + new.sep.array[op,2,-1] new.sep.array[h.n.var+1,1,1] <- subtree.new.root1 new.sep.array[h.n.var+1,2,1] <- subtree.new.root2 new.sep.array[h.n.var+1,3,1] <- new.cl new.sep.array[h.n.var+1,1,get.subtree.vars(hclust.result,subtree.new.root1)+1] <- 1 new.sep.array[h.n.var+1,2,get.subtree.vars(hclust.result,subtree.new.root2)+1] <- 1 new.sep.array[h.n.var+1,3,-1] <- new.sep.array[h.n.var+1,1,-1] + new.sep.array[h.n.var+1,2,-1] # 更新すべきオブジェクトを更新する. hlc <- new.hlc Q.T..Y <- new.Q.T..Y sep.array <- new.sep.array ah[op] <- FALSE #plot.hlc.graph(hlc) #dendrogram <- get.dendrogram(hlc) #plot(dendrogram,type="t",nodePar=list(pch=c(1,16),cex=c(1.3,.75),col=4,lab.cex=.75),yaxt="n",dLeaf=.3) } } result <- list("hlc"=hlc,"Q.T..Y"=Q.T..Y,"sep.array"=sep.array,"ah"=ah) return(result) } ### ベイジアンネットワークのハイパーパラメタ(ディリクレ分布)を表す配列を得る関数. # if prior = "K2", 入力されたalphaを元にK2事前分布のハイパーパラメータ配列を返す. # if prior = "BDeu", 入力されたalpha(equivalent sample size)を元にBDeu事前分布のハイパーパラメタ配列を返す. # if prior = "database", 関数get.dataset内で用いる配列を返す(通常は指定しない). get.alpha.table <- function(hlc,prior,alpha){ hidden.alpha <- hlc$hidden.structure$cpt hidden.alpha[hidden.alpha!=0] <- alpha observed.alpha <- hlc$observed.structure$cpt observed.alpha[observed.alpha!=0] <- alpha hidden.dim <- hlc$hidden.structure$dim observed.dim <- hlc$observed.structure$dim hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max) if(identical(prior,"K2")){ result <- list("hidden.alpha"=hidden.alpha,"observed.alpha"=observed.alpha) }else if(identical(prior,"BDeu")){ hidden.qr <- hidden.dim * hidden.dim[hidden.parent] hidden.qr[hlc$root.node] <- hidden.dim[hlc$root.node] observed.dim <- hlc$observed.structure$dim observed.parent <- hlc$observed.structure$parent observed.qr <- observed.dim * hidden.dim[observed.parent] hidden.alpha <- hidden.alpha / hidden.qr observed.alpha <- observed.alpha / observed.qr result <- list("hidden.alpha"=hidden.alpha,"observed.alpha"=observed.alpha) }else if(identical(prior,"database")){ hidden.alpha <- hidden.alpha / hidden.dim observed.alpha <- observed.alpha / observed.dim result <- list("hidden.alpha"=hidden.alpha,"observed.alpha"=observed.alpha) }else result <- NULL return(result) } ### 本研究で提案する階層潜在クラスモデルの推定を行う関数.(mainの関数) # gamma.steps: アニーリング γ=0 -> γ=1 のステップ数. # method: 階層クラスタリング手法の指定(デファルトは"ward"). # hclust.result: 階層構造のトポロジー情報(hclustオブジェクト)を直接与えたい場合は指定する. # distance: 階層クラスタリングに距離d[Cover & Thoms,1991]を用いるか,距離D[Li et al.,2002]を用いるかを指定. # prior: 事前分布のハイパーパラメタを指定する(デファルトは"K2"). # alpha: ハイパーパラメタの指定に用いる定数(デファルトは1). prior="K2",alpha=1のとき,事前分布は一様分布になる. # c.flag: 潜在変数のクラス数を同時推定したい場合は,c.flag=TRUEとする. # h.flag: 潜在変数を階層的に推定したい場合は,h.flag=TRUEとする.また潜在変数の数kも指定する(デファルトは2). # display: 推定計算の進行状況を出力するかどうかを指定(デファルトはTRUE). HLC.IBEM <- function(data,structure=NULL,alpha=1,gamma.steps=30,method="ward",EM.eps=1e-4,hclust.result=NULL, distance="D",prior="K2",k=2,h.flag=FALSE,c.flag=FALSE,display=TRUE){ p1 <- proc.time() # structureオブジェクトが入力されていなければ,structureオブジェクトを設定する. if(identical(structure,NULL)){ o.n.var <- dim(data)[2] structure <- list() structure$hidden$adj <- matrix(0,ncol=1,nrow=1) structure$hidden$dim <- 2 structure$observed$parent <- rep(1,o.n.var) structure$observed$dim <- unlist(lapply(lapply(data,levels),length)) } # structureオブジェクトからhlcオブジェクトを生成する. hlc <- set.hlc(structure) # 計算に便利な形に変換したデータオブジェクトを含むdatasetオブジェクトを生成. dataset <- get.dataset(hlc,data) observed.dim <- hlc$observed.structure$dim; hidden.dim <- hlc$hidden.structure$dim o.n.var <- hlc$observed.structure$n.var; h.n.var <- hlc$hidden.structure$n.var observed.parent <- hlc$observed.structure$parent # IB-EM法で用いる経験分布Q(Y)とQ(Xi|Y), 分布QでのエントロピーH..Q(Xi)を計算する. Q.Y <- prop.table(dataset$table.Y) Q.X..Y <- dataset$data.Y3[,,1,] Q.XY <- Q.X..Y * (rep(1,o.n.var)%o%rep(1,max(observed.dim))%o%Q.Y) Q.X <- marginalize(Q.XY,3) H.X <- -margin.table(ifelse(Q.X==0,0,Q.X*log(Q.X)),1) # 分布Q(Ti|Y)の設定. Q.T..Y <- array(0,dim=c(h.n.var,max(hidden.dim),dataset$dim.Y)) for(i in 1:h.n.var){ a <- runif(hidden.dim[i]*dataset$dim.Y,min=0,max=1) a <- array(a,dim=c(hidden.dim[i],dataset$dim.Y)) Q.T..Y[i,1:hidden.dim[i],1:dataset$dim.Y] <- prop.table(a,2) } # 与えられたデータ(経験分布Q)に階層クラスタリングを適用し,トポロジー情報を得る. if(h.flag){ if(identical(hclust.result,NULL)){ cMd <- calc.MI.distance(Q.X..Y,Q.Y,H.X) if(identical(distance,"d")) distance.matrix <- cMd$MI.d else if(identical(distance,"D")) distance.matrix <- cMd$MI.D hclust.result <- agnes(distance.matrix,diss=TRUE,method=method) } pltree(hclust.result) cutree.result <- cutree(hclust.result,k=k) goal.clusters <- list() for(i in 1:k) goal.clusters[[i]] <- which(cutree.result==i) # list of two clusters which will be separated for each hidden variables. hclust.root <- dim(hclust.result$merge)[1] subtree.root1 <- hclust.result$merge[hclust.root,1] subtree.root2 <- hclust.result$merge[hclust.root,2] sep.array <- array(0,dim=c(h.n.var,3,o.n.var+1)) sep.array[1,1,1] <- subtree.root1 sep.array[1,2,1] <- subtree.root2 sep.array[1,3,1] <- o.n.var sep.array[1,1,get.subtree.vars(hclust.result,subtree.root1)+1] <- 1 sep.array[1,2,get.subtree.vars(hclust.result,subtree.root2)+1] <- 1 sep.array[1,3,-1] <- sep.array[1,1,-1] + sep.array[1,2,-1] } ### MI.data: アニーリング過程における各γでの相互情報量と相互情報量距離を格納する配列. ### MI.data[type,gamma,variable index]. type: 1 = MI, 2 = MI.d, 3 = MI.D. MI.P.data <- list(); MI.Q.data <- list() add.state.data <- list() # data of gamma in which adding spare state. add.variable.data <- list() # data of gamma in which adding new hidden variable. ### 提案手法での推定計算の開始  ### gamma <- 0 gamma.const <- 1/gamma.steps EM.count <- 1; g.count <- 0 cat("progress of gamma annealing.\n") while(gamma <= 1){ g.count <- g.count + 1 # estimate parameters by EM with gamma. EM.result <- calc.EM(Q.T..Y,Q.Y,hlc,gamma,prior,alpha,dataset,EM.eps) hlc <- EM.result$hlc EM.count <- EM.count + EM.result$EM.count Q.T..Y <- EM.result$Q.T..Y Lagrangian <- EM.result$result.Lagrangian$Lagrangian if(display){ if(g.count %% 10 == 0) cat(g.count,sep="") else cat(".",sep=",") } # 確率伝搬法を用いて潜在変数の周辺分布を計算. BP.result <- HLC.BP(hlc) P.T <- BP.result$hidden$posterior[1,,] P.XT <- BP.result$observed$post.joint[1,,,] P.TPaT <- BP.result$hidden$post.joint[1,,,] if(hlc$hidden.structure$n.var==1){ P.T <- 1 %o% P.T P.TPaT <- 1%o% P.TPaT } # 相互情報量と相互情報量距離を計算する. MI.P.result <- calc.P..MI.distances(hlc,P.XT,P.TPaT,P.T,H.X) MI.TY <- EM.result$result.Lagrangian$MI.TiY; Q.TY = EM.result$Q.TY MI.Q.result <- calc.Q..MI.distances(hlc,MI.TY,Q.X..Y,Q.T..Y,Q.TY,Q.Y,H.X) MI.P.data <- rbind(MI.P.data,MI.P.result) MI.Q.data <- rbind(MI.Q.data,MI.Q.result) # 潜在変数のクラス数を推定(adjus))する. if(c.flag && g.count > 1){ ac.result <- adjust.cardinality(hlc,Q.T..Y,EM.result,dataset,display) if(sum(!ac.result$ac)!=0){ hlc <- ac.result$hlc Q.T..Y <- ac.result$Q.T..Y dataset <- get.dataset(hlc,data,dataset) add.state.data[[length(add.state.data)+1]] <- c("gamma"=gamma,"ac"=ac.result$ac) next } } # 潜在変数の階層性を推定(adjsut)する. # judge whether IBEM annealing step capture hierarchical structure of model. if(h.flag && g.count > 1){ ah.result <- adjust.hierarchy(hlc,Q.T..Y,Q.Y,MI.P.data,sep.array,hclust.result,goal.clusters,display) if(sum(!ah.result$ah)!=0){ hlc <- ah.result$hlc Q.T..Y <- ah.result$Q.T..Y sep.array <- ah.result$sep.array dataset <- get.dataset(hlc,data,dataset) add.variable.data[[length(add.variable.data)+1]] <- c("gamma"=gamma,"ah"=ah.result$ah) next } } gamma <- gamma + gamma.const } CS.result <- calc.CS(EM.result,dataset) p2 <- proc.time() calc.time <- p2 - p1 #cat("\nIBEM was converged: Cheeseman-Stutz score = ",CS.result$CS,"\n",sep="") result <- list() result$hlc <- hlc; result$Score <- CS.result result$hlc$names <- attr(data,"names") result$hclust.result <- hclust.result; result$k <- k result$distance <- distance result$gamma.steps <- gamma.steps; result$hclust.method <- method result$prior <- prior; result$alpha <- alpha result$h.flag <- h.flag; result$c.flag <- c.flag result$MI.P.data <- MI.P.data; result$MI.Q.data <- MI.Q.data result$add.state.data <- add.state.data; result$add.variable.data <- add.variable.data result$calc.time <- calc.time # クラスを設定する. class(result) <- "ibem" return(result) } ### 指定されたhlcオブジェクトに従ってギブスサンプリングを行う関数. # n: 生成するサンプルの数. # burn.in: サンプリング開始から何割のサンプルをburn inとして棄却するかを指定する(デフォルトは.3). # * この関数はかなり遅い. 改良の余地あり. sample.HLC <- function(hlc,n,burn.in=.3){ hidden.adj <- hlc$hidden.structure$adj hidden.cpt <- hlc$hidden.structure$cpt hidden.dim <- hlc$hidden.structure$dim root <- hlc$hidden.structure$root.node hidden.parent <- apply(hlc$hidden.structure$adj,2,which.max) observed.parent <- hlc$observed.structure$parent observed.cpt <- hlc$observed.structure$cpt observed.dim <- hlc$observed.structure$dim h.n.var <- hlc$hidden.structure$n.var o.n.var <- hlc$observed.structure$n.var # トポロジカル順序を求める. topological.order <- NULL que <- root while(length(que)!=0){ topological.order <- append(topological.order,que[1]) children <- which(hidden.adj[que[1],]==1) que = que[-1] if(!identical(children,integer(0))) que <- append(que,children) } n.config <- round(n/(1-burn.in)) h.config <- matrix(0,ncol=h.n.var,nrow=n.config) o.config <- matrix(0,ncol=o.n.var,nrow=n.config) # determine initial configuration by probabilistic logic sampling. for(i in topological.order){ if(i==root) h.parent.config <- 1 else h.parent.config <- h.config[1,hidden.parent[i]] h.config[1,i] <- sample(1:hidden.dim[i],1,prob=hidden.cpt[i,1:hidden.dim[i],h.parent.config]) o.children <- which(observed.parent==i) for(j in o.children){ o.config[1,j] <- sample(1:observed.dim[j],1,prob=observed.cpt[j,1:observed.dim[j],h.config[1,i]]) } } # ギブスサンプリングを開始する. for(t in 2:n.config){ for(i in topological.order){ if(i==root) h.parent.config <- 1 else h.parent.config <- h.config[t,hidden.parent[i]] # calculate posterior P(Ti|e). pi <- hidden.cpt[i,,h.parent.config] h.children <- which(hidden.adj[i,]==1) h.pc <- h.config[t-1,h.children]%o%rep(1,max(hidden.dim))%o%rep(1,max(hidden.dim)) h.pc2 <- array(1:max(hidden.dim),dim=c(max(hidden.dim),length(h.children),max(hidden.dim))) h.pc2 <- aperm(h.pc2,c(2,1,3)) h.previous.config <- ifelse(h.pc==h.pc2,1,0) if(identical(h.children,integer(0))) h.lambda <- 1 else{ hidden.cpt2 <- hidden.cpt[h.children,,] if(length(h.children)==1) hidden.cpt2 <- 1 %o% hidden.cpt2 h.lambda.ms <- marginalize(hidden.cpt2 * h.previous.config,2) h.lambda <- exp(marginalize(log(h.lambda.ms),1)) } o.children <- which(observed.parent==i) o.pc <- o.config[t-1,o.children]%o%rep(1,max(observed.dim))%o%rep(1,max(hidden.dim)) o.pc2 <- array(1:max(observed.dim),dim=c(max(observed.dim),length(o.children),max(hidden.dim))) o.pc2 <- aperm(o.pc2,c(2,1,3)) o.previous.config <- ifelse(o.pc==o.pc2,1,0) observed.cpt2 <- observed.cpt[o.children,,] if(length(o.children)==1) observed.cpt2 <- 1 %o% observed.cpt2 o.lambda.ms <- marginalize(observed.cpt2 * o.previous.config,2) o.lambda <- exp(marginalize(log(o.lambda.ms),1)) lambda <- h.lambda * o.lambda h.posterior <- prop.table(pi*lambda) # generate sample Ti from posterior. h.config[t,i] <- sample(1:hidden.dim[i],1,prob=h.posterior[1:hidden.dim[i]]) # generate sample Xj from posterior. o.children <- which(observed.parent==i) for(j in o.children){ o.config[t,j] <- sample(1:observed.dim[j],1,prob=observed.cpt[j,1:observed.dim[j],h.config[t,i]]) } } } # burn inを棄却する. h.samples <- h.config[-(1:round(n.config*burn.in)),] h.samples <- convert..to..factor(as.data.frame(h.samples)) o.samples <- o.config[-(1:round(n.config*burn.in)),] o.samples <- convert..to..factor(as.data.frame(o.samples)) return(list("h.samples"=h.samples,"o.samples"=o.samples)) } print.ibem <- function(ibem){ cat("Learning Hierarhical latent class model by Information Bottleneck EM.\n") cat("=== parameters of HLC.IBEM ===\n") cat("*** gamma.steps:",ibem$gamma.steps,"\n") cat("*** learning cardinality:",ifelse(ibem$c.flag,"YES","NO"),"\n") cat("*** learning hierarchy:",ifelse(ibem$h.flag,"YES","NO"),"\n") cat("*** method of hclust:",ibem$hclust.method,"\n") cat("*** k of cutree:",ibem$k,"\n") cat("*** distance type:",ibem$distance,"\n") cat("*** prior: ",ibem$prior," [alpha = ",ibem$alpha,"]\n",sep="") cat("\n=== result of learning ===\n") cat("*** Cheeseman-Stutz score:",ibem$Score$CS,"\n") cat("*** number of hidden variables:",ibem$hlc$hidden.structure$n.var,"\n") cat("*** number of observed variables:",ibem$hlc$observed.structure$n.var,"\n") cat("*** cardinality of hidden variables:",ibem$hlc$hidden.structure$dim,"\n") cat("*** number of parameters:",get.num.of.param(ibem$hlc),"\n") cat("*** calculation time:",ibem$calc.time,"\n") } ### hlcオブジェクトからstructureオブジェクトを得る関数. convert..hlc..to..structure <- function(hlc){ structure <- list() structure$hidden <- hlc$hidden.structure structure$observed <- hlc$observed.structure structure$hidden$cpt <- NULL structure$observed$cpt <- NULL return(structure) } ### 数値データ行列(Integer)を因子データ行列に変換する関数. convert..to..factor <- function(data){ for(i in 1:dim(data)[2]){ col <- data[,i] if(!identical(class(col),"factor")){ data[,i] <- as.factor(col) } } return(data) } ### 関数HLC.IBEMの返り値であるibemオブジェクトから,MI.P.dataのプロット(matplot)を出力する関数. # プロットのcol,ltyには潜在変数の番号を使う. plot.MI.data <- function(ibem,type=1,distance="MI.XT",...){ MI.data <- ibem$MI.P.data x <- MI.data[,distance] x2 <- unlist(x) gamma.steps <- length(x) #par(mar=c(5,4,2,2)) if(identical(distance,"MI.XT") || identical(distance,"H.XT")){ if(identical(distance,"MI.XT")) k <- 3 else if(identical(distance,"H.XT")) k <- 2 n.var <- length(x2) / (gamma.steps*k) vars <- 1:n.var dim(x2) <- c(k,n.var,gamma.steps) if(length(vars)==1) matplot(1:gamma.steps,x2[type,vars,],type="l",axes=F,...) else matplot(1:gamma.steps,t(x2[type,vars,]),type="l",axes=F,...) box() axis(1,at = seq(0,gamma.steps,length=11),labels=round(seq(0,1,length=11),3)) axis(2) }else if(identical(distance,"MI.TY")){ h.n.var <- dim(x[[gamma.steps]])[2] MI.array <- array(0,dim=c(4,h.n.var,gamma.steps)) for(i in 1:gamma.steps){ hnv <- dim(x[[i]])[2] MI.array[,1:hnv,i] <- x[[i]] } if(h.n.var==1) matplot(1:gamma.steps,MI.array[type,,],type="l",axes=F,...) else matplot(1:gamma.steps,t(MI.array[type,,]),type="l",axes=F,...) box() axis(1,at = seq(0,gamma.steps,length=11),labels=round(seq(0,1,length=11),3)) axis(2) }else return(NULL) } ### Graphvizでグラフを描画するためのdotファイルを書き出す関数. write.hlc.graph <- function(hlc,file,font.ratio = 3,font.size = 8){ ### get Adjacency table for all variable. observed.parent <- hlc$observed.structure$parent o.parent.nodes <- unique(observed.parent) hidden.adj <- hlc$hidden.structure$adj h.n.var <- hlc$hidden.structure$n.var o.n.var <- hlc$observed.structure$n.var n.var <- h.n.var + o.n.var ADJ <- matrix(0,ncol=n.var,nrow=n.var) ADJ[1:h.n.var,1:h.n.var] <- hidden.adj for(op in o.parent.nodes){ o.var.op <- which(observed.parent==op) ADJ[op,h.n.var+o.var.op] <- 1 } var.names <- c(paste("h",1:h.n.var,sep=""),paste("o",1:o.n.var,sep="")) ### write DOT data. buffer <- NULL # initialize strings buffer. buffer <- paste(buffer,"digraph dotgraph","{\n") buffer <- paste(buffer,"graph [ratio = 0.8];\n") # write hidden variables data. for(i in 1:h.n.var){ v <- var.names[i] buffer <- paste(buffer,v,"[fontsize =",font.size*font.ratio,"];\n") for(j in 1:h.n.var){ if(ADJ[i,j]==1){ n <- var.names[j] buffer <- paste(buffer,v,"->",n,";\n") } } } v <- var.names[h.n.var] buffer <- paste(buffer,v,"[fontsize =",font.size*font.ratio,"];\n") # write observed variables data. for(i in 1:h.n.var){ v <- var.names[i] for(j in (h.n.var+1):n.var){ n <- var.names[j] buffer <- paste(buffer,n,"[fontsize =",font.size,"];\n") if(ADJ[i,j]==1){ buffer <- paste(buffer,v,"->",n,";\n") } } } buffer <- paste(buffer,"}") cat(buffer,file=file) } ### 研究で用いる人工データのためのモデル ### # 発表スライドで用いているモデル. synthetic.hlc <- list() synthetic.hlc$hidden.structure$root.node <- 2 h.adj <- matrix(0,ncol=3,nrow=3) h.adj[2,c(1,3)] <- 1 synthetic.hlc$hidden.structure$adj <- h.adj synthetic.hlc$hidden.structure$dim <- rep(2,3) synthetic.hlc$hidden.structure$n.var <- 3 synthetic.hlc$hidden.structure$cpt <- array(c(.2,.5,.2,.8,.5,.8,.8,0,.8,.2,0,.2),dim=c(3,2,2)) synthetic.hlc$observed.structure$dim <- rep(2,30) synthetic.hlc$observed.structure$parent <- c(rep(1,10),rep(2,10),rep(3,10)) synthetic.hlc$observed.structure$n.var <- 30 synthetic.hlc$observed.structure$cpt <- array(c(rep(.2,30),rep(.8,30),rep(.8,30),rep(.2,30)),dim=c(30,2,2)) class(synthetic.hlc) <- "hlc"