Pages

Thursday, September 11, 2014

Some function for VS: AUC , BEDROC and RIE in R

Below are some R functions to compute Area under the curve , Robust Initial Enhancement Metric and Boltzmann-Enhanced Discrimination of ROC which is implemented in

Truchon et al. Evaluating Virtual Screening Methods: Good and Bad Metrics for the "Early Recognition" J. Chem. Inf. Model. (2007) 47, 488-508.

These metrics use for the early recognition problem in virtual screening.

AUC
# x = a vector for scores
# y = a vector of labels
function (x, y, decreasing = TRUE, top = 1)
{
if (length(x) != length(y)) {
stop(paste("Length of scores does not match with labels."))
}
N <- length(y)
n <- sum(y == 1)
x_p <- -Inf
area <- 0
fp = tp = fp_p = tp_p = 0
ord <- order(x, decreasing = decreasing)
for (i in seq_along(ord)) {
j <- ord[i]
if (x[j] != x_p) {
if (fp >= (N - n) * top) {
rat <- ((N - n) * top - fp_p)/(fp - fp_p)
area <- area + rat * (fp - fp_p) * (tp + tp_p)/2
return(area/(n * (N - n) * top))
}
area <- area + (fp - fp_p) * (tp + tp_p)/2
x_p <- x[j]
fp_p <- fp
tp_p <- tp
}
if (y[j] == 1) {
tp <- tp + 1
}
else {
fp <- fp + 1
}
}
area <- area + (fp - fp_p) * (tp + tp_p)/2
return(area/(n * (N - n)))
}
view raw auc.R hosted with ❤ by GitHub


RIE

# x = a vector of scores
# y = a vector of labels
function (x, y, decreasing = TRUE, alpha = 20)
{
if (length(x) != length(y)) {
stop(paste("The length of scores should be equal to number of labels."))
}
N <- length(y)
n <- length(which(y == 1))
ord <- order(x, decreasing = decreasing)
m_rank <- which(y[ord] == 1)
s <- sum(exp(-alpha * m_rank/N))
ra <- n/N
ri <- (N - n)/N
random_sum <- (n/N) * (1 - exp(-alpha))/(exp(alpha/N) - 1)
return(s/random_sum)
}
view raw rie.R hosted with ❤ by GitHub


BEDROC

# x = a vector of scores
# y = a vector of labels
function (x, y, decreasing = TRUE, alpha = 20)
{
if (length(x) != length(y)) {
stop(paste("The length of scores should be equal to number of labels."))
}
N <- length(y)
n <- length(which(y == 1))
ord <- order(x, decreasing = decreasing)
m_rank <- which(y[ord] == 1)
s <- sum(exp(-alpha * m_rank/N))
ra <- n/N
ri <- (N - n)/N
random_sum <- ra * exp(-alpha/N) * (1 - exp(-alpha))/(1 -
exp(-alpha/N))
fac <- ra * sinh(alpha/2)/(cosh(alpha/2) - cosh(alpha/2 -
alpha * ra))
cte = 1/(1 - exp(alpha * ri))
return(s/random_sum * fac + cte)
}
view raw bedroc.R hosted with ❤ by GitHub


No comments: