Markov Chain Clustering (MCL) is fast scalable supervised clustering algorithm based on information flow in graphs. The algorithm finds cluster in graphs by random walks. It uses two important operators one is the inflation and other the expansion. "Expansion takes the power of a stochastic matrix using the normal matrix product. Inflation takes the Hadamard power of a matrix (taking powers entrywise), followed by a scaling step, such that the resulting matrix is stochastic again, i.e. the matrix elements (on each column) correspond to probability values." More information can be found here and here . Below the R code describes how to perform it step by step. Also a nice explanation is presented here .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Algorithm to perform MCL Clustering in R | |
# Add the identity matrix to the matrix which indicates self loops | |
add.selfloops <- function (M) { | |
LM<-M+diag(dim(M)[1]) | |
return (LM); | |
} | |
# Inflation step of MCL | |
inflate <- function (M,inf) { | |
M <- M^(inf) | |
return (M) | |
} | |
# Normalize the matrix by column | |
norm <- function (M) { | |
col.sum <- apply(M,2,sum) | |
M <- t(M) / col.sum | |
return (t(M)) | |
} | |
# MCL procedure | |
mcl <- function (M, # Matrix | |
inf, # Inflation value | |
iter, # Number of iterations | |
verbose = T | |
) | |
{ | |
for (i in 1:iter) { | |
old.M <- M | |
M.norm <- norm(M) | |
M <- M.norm%*%M.norm | |
M <- inflate(M, inf) | |
M <- norm(M) | |
# Check to reach steady state vector | |
if (sum(old.M == M) == dim(M)[1]*dim(M)[2]) { | |
break; | |
} | |
if (verbose) { | |
print (paste ("iteration", i)); | |
} | |
} | |
return (M); | |
} | |
# get the grouping using the matrix names after running the iteration | |
group.mcl.clusters <- function (M) { | |
M.names <- row.names(M) | |
clustered.nodes <- vector(mode = "logical", length = dim(M)[1]) | |
for (i in 1:dim(M)[1]) { | |
nodes <- M.names[which(M[i,] != 0)] | |
if (length(nodes) > 0 && !clustered.nodes[which(M[i,] != 0)]) { | |
print (nodes) | |
clustered.nodes[which(M[i,] != 0)] = T | |
} | |
} | |
} | |
## End of functions | |
## Example | |
## Consider a unipartite network example protein protein interaction data or social network data. | |
data <- as.matrix(read.csv("mcl_data.csv", header = T, row.names = 1)) | |
data <- as.matrix(data) | |
data <- add.selfloops(data); | |
inf <- 2 | |
iter<-200 | |
# Launch MCL | |
clusters <- mcl(data,inf,iter, verbose = T); | |
group.mcl.clusters(clusters) | |