########################################################################################################
# The Broad Institute
# SOFTWARE COPYRIGHT NOTICE AGREEMENT
# This software and its documentation are copyright 2007 by the
# Broad Institute/Massachusetts Institute of Technology.
# All rights are reserved.
#
# This software is supplied without any warranty or guaranteed support
# whatsoever. Neither the Broad Institute nor MIT can be responsible for
# its use, misuse, or functionality.
########################################################################################################
#
# An erythroid differentiation signature predicts response to lenalidomide in Myelodysplastic Syndrome
# Benjamin L. Ebert, Naomi Galili, Pablo Tamayo, Jocelyn Bosco, Raymond Mak, Jennifer Pretz,
# Christine Ladd-Acosta, Richard Stone.
#
# Author: Pablo Tamayo  -  April 12, 2007 tamayo@broad.mit.edu
#
# This R script below implements the regression model described in the paper to
# predict response lenalidomide in patients with Myelodysplastic Syndrome.
#
# Additional auxiliary functions are included in file: Rev.msig.library.1.R
#
# The progam uses a control gene signature defined on 5 housekeeping genes to normaize the expression
# data. It also performs colum-rank normalization and row-standardization to make the model more robust and
# less dependent on platform idyosincracies. The script computes a score based on summarizing
# genes in a signature.
# This score is used as input to a probit linear regression model that is trained on the train set and
# applied to the test set.
#
# If you want to run this program make sure you change the file pathnames to the appropriate location
# in your computer. Make also sure you have the input datasets in the right locations and that the
# parameters are set properly according to your purposes.  Cut and paste the script into the
# R GUI to run the program. With the default settings you should be able to reprooduce the heatmaps and
# prediction results reported in the paper and shown in figure 4. The program is set up to run the
# Affymetrix files but it can be easily change to run the Luminex of PCR datasets bu just uncommenting the
# corresponding file pathnames below. The program uses by default a classic fit to a
# probit regression model using R's glm function but it can also use a Bayesiam fit using the MCMCpack package.
#
# A more general version of this program will be made available as part of a forthcoming publication.
#
# This program comes in a ZIP file with the following dataset:
#
# Revlimid_Affy_train_5qm.gct  Affymetrix train set 5q- samples
# Revlimid_Affy_train_5qm.cls
#
# Revlimid_Affy_train_non_5qm.gct  Affymetrix train set non-5q- samples
# Revlimid_Affy_train_non_5qm.cls
#
# Revlimid_Affy_test_5qm.gct  Affymetrix test set 5q- samples
# Revlimid_Affy_test_5qm.cls  
#
# Revlimid_Affy_test_non_5qm.gct  Affymetrix test set non-5q- samples
# Revlimid_Affy_test_non_5qm.cls
#
# Revlimid_Affy_all.gct  Affymetrix all samples
# Revlimid_Affy_all.cls
#
# Revlimid_Affy_all_5qm.gct  Affymetrix all 5q- samples
# Revlimid_Affy_all_5qm.cls
#
# Revlimid_Affy_all_non_5qm.gct  Affymetrix all non-5q- samples#
# Revlimid_Affy_all_non_5qm.cls
#
# Revlimid_Luminex_train_non_5qm.gct  Luminex train set non-5q- samples
# Revlimid_Luminex_train_non_5qm.cls
#
# Revlimid_Luminex_test_non_5qm.gct  Luminex test set non-5q- samples
# Revlimid_Luminex_test_non_5qm.cls
#
# Revlimid_qPCR_train_non_5qm.gct   qPCR train set non-5q- samples
# Revlimid_qPCR_train_non_5qm.cls
#
# signatures.gmt  file containing the erythroid anf control signatures
#
# Upon completion the program produces the following output files:
#
# Revlimid.Affy.paper.plot.1.train.set.jpeg  heatmap for signature in the train set 
# Revlimid.Affy.paper.plot.2.train.set.jpeg  prediction scores in the train set 
# Revlimid.Affy.paper.plot.2.test.set.jpeg  heatmap for signature in the test set 
# Revlimid.Affy.paper.plot.1.test.set.jpeg  prediction scores in the test set 
#
# Other additional files contain the regression model scores and the predictive probabilities
#
# Revlimid.Affy.train.set.zscore.jpeg  
# Revlimid.Affy.train.set.zscore.gct
# Revlimid.Affy.train.set.probs.gct 
# Revlimid.Affy.train.set.post.prob.jpeg
# Revlimid.Affy.test.set.zscore.jpeg
# Revlimid.Affy.test.set.zscore.gct
# Revlimid.Affy.test.set.probs.gct
# Revlimid.Affy.test.set.post.prob.jpeg
#
# Program Files and Input Parameters #########################################

program.location    <-  "c:/CGP2007/Revlimid/Software2/Rev.msig.library.1.R"
source(program.location)   # Load function library

##### Affy datasets
# Affy training set

train.ds <-  "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_train_non_5qm.gct"
train.cls <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_train_non_5qm.cls"

# Affy test set: non 5q- samples 

test.ds <-  "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_test_non_5qm.gct"
test.cls <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_test_non_5qm.cls"

# Affy test set: all the 5q- samples

# test.ds <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_all_5qm.gct"
# test.cls <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_all_5qm.cls"

# Affy test set: all the test samples (5q- and non 5q-)

# test.ds <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_test_all.gct"
# test.cls <- "c:/CGP2007/Revlimid/Software2/Revlimid_Affy_test_all.cls"

# ###### qPCR datasets

# train.ds <-  "C:/CGP2007/Revlimid/Software2/Revlimid_qPCR_train_non_5qm.gct"
# train.cls <- "C:/CGP2007/Revlimid/Software2/Revlimid_qPCR_train_non_5qm.cls"

# test.ds <-   "C:/CGP2007/Revlimid/Software2/Revlimid_qPCR_train_non_5qm.gct"
# test.cls <-  "C:/CGP2007/Revlimid/Software2/Revlimid_qPCR_train_non_5qm.cls"

##### Luminex datasets

# train.ds <- "C:/CGP2007/Revlimid/Software2/Revlimid_Luminex_train_non_5qm.gct"
# train.cls <- "C:/CGP2007/Revlimid/Software2/Revlimid_Luminex_train_non_5qm.cls"

# test.ds <- "C:/CGP2007/Revlimid/Software2/Revlimid_Luminex_test_non_5qm.gct"
# test.cls <- "C:/CGP2007/Revlimid/Software2/Revlimid_Luminex_test_non_5qm.cls"

target.class <- "R"  # Responders
true.classes.names <- c("Actual Responder", "Actual Non-Responder")
msig.db <- "c:/CGP2007/Revlimid/Software2/signatures.gmt"  # file with signatures
output.dir <- "c:/CGP2007/Revlimid/Software2/"
msig.p.name <- "erythroid_non_5q-"  # Erythroid (UP) signature
msig.n.name <- "five_HKG"           # Group of 5 housekeeping genes for Affy data
file.iden <- "Revlimid.Affy"
# file.iden <- "Revlimid.Luminex"
# file.iden <- "Revlimid.qPCR"
link.function <- "probit"  #  "probit" or "logit"
model.type <- "Classic" #  "Bayesian" or "Classic"
burnin.iter <- 5000
mcmc.iter <- 25000
thres.preprocess <- 20
ceiling.preprocess <- 100000
rank.normalize.datasets <- T
seed <- 1234
c1 <- c("green3", "red3") # color for target class and control class

################## Start of Method ################################################
  
set.seed(seed=seed, kind = NULL)

# Read and preprocess training dataset
  
dataset <- MSIG.Gct2Frame(filename = train.ds)
m <- data.matrix(dataset$ds)
gene.names <- dataset$row.names
gene.descs <- dataset$descs 
sample.names <- dataset$names
Ns <- length(m[1,])
Ng <- length(m[,1])

# Read class labels

CLS <- MSIG.ReadClsFile(file=train.cls)
class.labels <- CLS$class.v
class.phen <- CLS$phen
class.list <- CLS$class.list 

class.labels <- match(class.list, class.phen)
class.phen <- unique(class.list)

if(! is.element(target.class, class.phen)) {
  stop("Target class not represented in training data (cls file)")
}

order.class.phen <- c(target.class, setdiff(class.phen, target.class))

# threshold, ceiling and shift

   if (thres.preprocess != "NULL") {
     m[m < thres.preprocess] <- thres.preprocess
   }
   if (ceiling.preprocess != "NULL") {
      m[m > ceiling.preprocess] <- ceiling.preprocess
    }

# Rank normalization

   if (rank.normalize.datasets == T) {
      cols <- length(m[1,])
      for (j in 1:cols) {  # column rank normalization from 0 to N - 1
         m[,j] <- rank(m[,j], ties.method = "average") - 1
      }
      m <- 10000*m/(length(m[,1]) - 1)  # data is rescaled in scale 0-10000
    }

# Start methodology

erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1

# Read signatures file

temp <- readLines(msig.db)
max.Ngs <- length(temp)
temp.size.G <- vector(length = max.Ngs, mode = "numeric") 
for (i in 1:max.Ngs) {
  temp.size.G[i] <- length(unlist(strsplit(temp[[i]], "\t"))) - 2
}
max.size.G <- max(temp.size.G)      
gs <- matrix(rep("null", max.Ngs*max.size.G), nrow=max.Ngs, ncol= max.size.G)
temp.names <- vector(length = max.Ngs, mode = "character")
temp.desc <- vector(length = max.Ngs, mode = "character")
gs.count <- 1
for (i in 1:max.Ngs) {
  gene.set.size <- length(unlist(strsplit(temp[[i]], "\t"))) - 2
  gs.line <- noquote(unlist(strsplit(temp[[i]], "\t")))
  gene.set.name <- gs.line[1] 
  gene.set.desc <- gs.line[1] 
  gene.set.tags <- vector(length = gene.set.size, mode = "character")
  for (j in 1:gene.set.size) {
    gene.set.tags[j] <- gs.line[j + 2]
  } 
  set.size <- length(gene.set.tags)
  temp.size.G[gs.count] <- set.size
  gs[gs.count,] <- c(gene.set.tags, rep("null", max.size.G - temp.size.G[gs.count]))
  temp.names[gs.count] <- gene.set.name
  temp.desc[gs.count] <- gene.set.desc
  gs.count <- gs.count + 1
} 

Ngs <- gs.count - 1
gs.names <- vector(length = Ngs, mode = "character")
gs.desc <- vector(length = Ngs, mode = "character")
size.G <- vector(length = Ngs, mode = "numeric") 
gs.names <- temp.names[1:Ngs]
gs.desc <- temp.desc[1:Ngs] 
size.G <- temp.size.G[1:Ngs]

print(c("Number of Gene Sets:", Ngs))
print(c("Original number of Gene Sets:", max.Ngs))
print(c("Maximum gene set size:", max.size.G))


# Select genes in positive and control signature

set.entry.p <- match(msig.p.name, gs.names)
msig.list.p <- gs[set.entry.p,][1:size.G[set.entry.p]]
set.entry.n <- match(msig.n.name, gs.names)
msig.list.n <- gs[set.entry.n,][1:size.G[set.entry.n]]

set.p <- match(msig.list.p, gene.names)
size.set.p <- sum(!is.na(set.p))
msig.p <- matrix(0, nrow = size.set.p, ncol=Ns)
if (size.set.p > 1) {
  msig.p <- m[set.p[!is.na(set.p)],]
} else {
  msig.p <- t(as.matrix(m[set.p[!is.na(set.p)],]))
}
set.n <- match(msig.list.n, gene.names)
size.set.n <- sum(!is.na(set.n))
msig.n <- matrix(0, nrow = size.set.n, ncol=Ns)
if (size.set.n > 1) {
  msig.n <- as.matrix(m[set.n[!is.na(set.n)],])
} else {
  msig.n <- t(as.matrix(m[set.n[!is.na(set.n)],]))
}

# plot signatures before normalization

# windows(width=24, height=20)
# nf <- layout(matrix(c(1, 2, 3, 4), 2, 2, byrow=T), widths = c(6, 2), heights = c(1.5, 1), respect = FALSE)

if (target.class == class.phen[1]) {
  c.vec <- c1
} else {
  c.vec <- rev(c1)
}

# MSIG.HeatMapPlot.3(V = msig.p, row.names = row.names(msig.p), col.labels = class.labels,
#                   col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names,
#                   main = "Signature: training set before cntrl normalization", xlab=" ", ylab=" ",
#                   sub = " ", cmap.type = 4, row.norm=T) 

# leg.txt <- class.phen
# p.vec <- rep(21, 21)
# par(mar = c(0, 0, 0, 0))
# plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
# legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
#       col = "black", cex = 2, pt.cex=3)

# MSIG.HeatMapPlot.3(V = msig.n, row.names = row.names(msig.n), col.labels = class.labels,
#                   col.classes = class.phen, phen.cmap = c1, col.names = sample.names,
#                   main = "Control set: training set before cntrl normalization", xlab=" ", ylab=" ",
#                   sub = " ", cmap.type = 4, row.norm=T) 

# savePlot(filename = paste(output.dir, file.iden, ".train.set.b4.norm", sep=""), type ="jpeg", device = dev.cur())

# Define baseline control as average of control signature

if (length(msig.list.n) == 1) {
   cntrl <- msig.n
} else {
   cntrl <- apply(msig.n, MARGIN=2, FUN=mean)
}
  
# Preprocessing: scaling with respect to average of control signature

  Ng2 <- length(msig.p[,1])
  m2 <- matrix(0, nrow=Ng2, ncol=Ns)  
  for (i in 1:Ng2) {
    m2[i,] <- msig.p[i,]/cntrl
  }

# plot signature after ratio

#  windows(width=16, height=10)
#  nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

  if (target.class == class.phen[1]) {
    c.vec <- c1
  } else {
    c.vec <- rev(c1)
  }

#  MSIG.HeatMapPlot.3(V = m2, row.names = row.names(msig.p), col.labels = class.labels, col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names, main = "Signature: training set after cntrl normalization", xlab=" ", ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

 # leg.txt <- class.phen
 # p.vec <- rep(21, 21)
 # par(mar = c(0, 0, 0, 0))
 # plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
 # legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
 #        col = "black", cex = 2, pt.cex=3)
  
#  savePlot(filename = paste(output.dir, file.iden, ".train.set.after.ratio", sep=""), type ="jpeg",
#           device = dev.cur())

# Row normalization

  row.mean <- apply(m2, MARGIN=1, FUN=mean)
  row.sd <- apply(m2, MARGIN=1, FUN=sd)
  row.n <- length(m2[,1])
  m3 <- matrix(0, nrow=Ng2, ncol=Ns)  
  for (i in 1:Ng2) {
    if (row.sd[i] == 0) {
      m3[i,] <- 0
    } else {
      m3[i,] <- (m2[i,] - row.mean[i])/row.sd[i]
    }
  }

# plot signature after row normalization

#  windows(width=16, height=10)
#  nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

#  if (target.class == class.phen[1]) {
#    c.vec <- c1
#  } else {
#    c.vec <- rev(c1)
#  }

#  MSIG.HeatMapPlot.3(V = m3, row.names = row.names(msig.p), col.labels = class.labels,
#                     col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names,
#                     main = "P signature: training set after row normalization", xlab=" ", ylab=" ",
#                     sub = " ", cmap.type = 4, row.norm=T) 

#  leg.txt <- class.phen
#  p.vec <- rep(21, 21)
#  par(mar = c(0, 0, 0, 0))
#  plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
#  legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
#         col = "black", cex = 2, pt.cex=3)

#  savePlot(filename = paste(output.dir, file.iden, ".train.set.after.norm", sep=""), type ="jpeg",
#           device = dev.cur())

# Compute average regressor scores

  z.vector <- apply(m3, MARGIN=2, FUN=mean)
  z.vector.error <- apply(m3, MARGIN=2, FUN=sd)/sqrt(length(z.vector))
                                                                                
# Plot score

windows(width=16, height=4)
nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

if (target.class == class.phen[1]) {
  c.vec <- c1
} else {
  c.vec <- rev(c1)
}

MSIG.HeatMapPlot.3(V = t(as.matrix(z.vector)), row.names = "regressor score", col.labels = class.labels,
                   col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names,
                   main = "Training regressor score", xlab=" ", ylab=" ", sub = " ", cmap.type = 4,
                   row.norm=T) 

leg.txt <- class.phen
p.vec <- rep(21, 21)

par(mar = c(0, 0, 0, 0))
plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
       col = "black", cex = 2, pt.cex=3)

savePlot(filename = paste(output.dir, file.iden, ".train.set.zscore", sep=""), type ="jpeg",
         device = dev.cur())

# save regressor score in file (for glm ANOVA analysis)

V <- data.frame(t(z.vector))
names(V) <- sample.names
row.names(V) <- "regressor score"
descs <- "regressor score"
write.gct(gct.data.frame = V, descs = descs, filename =
          paste(output.dir, file.iden, ".train.set.zscore.gct", sep=""))  

# MCMC model

library(MCMCpack)

n.obs <- Ns

if(! is.element(target.class, class.phen)) {
  stop("Target class not represented in training data (cls file)")
}

y  <- ifelse(class.list == target.class, 1, 0)
col.vec <- y

if (model.type == "Bayesian") {
  if (link.function == "logit") {
    reg.model <- MCMClogit(y ~ z.vector,  burnin = burnin.iter,
                           mcmc = mcmc.iter, bayes.resid=T) # Logit
  } else if (link.function == "probit") {
    reg.model <- MCMCprobit(y ~ z.vector, burnin = burnin.iter,
                            mcmc = mcmc.iter, bayes.resid=T) # Probit
  } else {
    stop("Unknown link function")
  }
} else if (model.type == "Classic") {
  if (link.function == "logit") {
    reg.model <- glm(y ~ z.vector,  family=binomial("logit")) # Logit
  } else if (link.function == "probit") {
    reg.model <- glm(y ~ z.vector,  family=binomial("probit")) # Probit
  } else {
    stop("Unknown link function")
  }
} else {
  stop("Unknown model type")
}

if (model.type == "Bayesian") {
  beta0 <- reg.model[,1]
  beta1 <- reg.model[,2]
  print(c("beta0=", beta0, " beta1=", beta1))
  prob.i <- matrix(0, nrow = n.obs, ncol=3)
} else if (model.type == "Classic") {
  beta0 <- reg.model[[1]][1]
  beta1 <- reg.model[[1]][2]
  print(c("beta0=", beta0, " beta1=", beta1))
  prob.i <- matrix(0, nrow = n.obs, ncol=3)
} else {
stop("Unknown model type")
}

for (i in 1:n.obs) {
  if (link.function == "logit") {
    p.vec <- (exp(beta0 + beta1 * z.vector[i])/(1 + exp(beta0 + beta1 * z.vector[i])))  # Logit
  } else if(link.function == "probit") {
    p.vec <-  (erf(beta0 + beta1 * z.vector[i]) + 1)/2  # Probit
  } else {
    stop("Unknown link function")
  }
  prob.i[i,1] <- quantile(p.vec, probs=0.5)
  prob.i[i,2] <- quantile(p.vec, probs=0.05)
  prob.i[i,3] <- quantile(p.vec, probs=0.95)
}

xmin <- min(z.vector)
xmax <- max(z.vector)
range.x <- xmax - xmin
prob.m <- matrix(0, nrow = 1000, ncol=3)
x.m <- vector(length=1000, mode="numeric")
for (k in 1:1000) {
  x.m[k] <- xmin + k*(range.x/1000)
  if (link.function == "logit") {
    p.vec <- (exp(beta0 + beta1 * x.m[k])/(1 + exp(beta0 + beta1 * x.m[k])))  # Logit
  } else if(link.function == "probit") {
    p.vec <-  (erf(beta0 + beta1 * x.m[k]) + 1)/2  # Probit
  } else {
    stop("Unknown link function")
  }

  prob.m[k,1] <- quantile(p.vec, probs=0.5)
  prob.m[k,2] <- quantile(p.vec, probs=0.05)
  prob.m[k,3] <- quantile(p.vec, probs=0.95)
}

istar <- which.min(abs(0.5 - prob.m[,1]))
istar <- xmin + istar*(range.x/1000)

# Save prob.m in file 
  
# V <- data.frame(rbind(x.m, prob.m[,1], prob.m[,2], prob.m[,3]))
# names(V) <- seq(1, length(x.m))
# row.names(V) <- c("x.m", "prob.m_0.5", "prob.m_0.05", "prob.m_0.95")
# descs <- c("x.m", "prob.m_0.5", "prob.m_0.05", "prob.m_0.95")
# write.gct(gct.data.frame = V, descs = descs, filename =
#          paste(output.dir, file.iden, ".train.set.prob.model.gct", sep=""))  

x.index <- order(z.vector, decreasing=F)
x.order <- z.vector[x.index]
prob.i.order <- prob.i[x.index,]
col.vec.order <- col.vec[x.index]
col.vec.order <- ifelse(col.vec.order == 1, c1[1], c1[2])
col.vec <- ifelse(col.vec == 1, c1[1], c1[2])

# Save probabilities in file 
  
V <- data.frame(rbind(class.labels, z.vector, rep(istar, length(z.vector)),
                      prob.i[,1], prob.i[,2], prob.i[,3]))
names(V) <- sample.names
row.names(V) <- c("class", "zscore", "istar", "prob_0.5", "prob_0.05", "prob_0.95")
descs <- c("class", "zscore", "istar", "prob_0.5", "prob_0.05", "prob_0.95")
write.gct(gct.data.frame = V, descs = descs, filename =
          paste(output.dir, file.iden, ".train.set.probs.gct", sep=""))  

# Plot posterior probabilities

windows(height = 7, width = 9.5)
nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(3.75, 1), heights = 1, respect = FALSE)

plot(x.order, prob.i.order[,1], main = paste("Probability for Target Class:", target.class, sep=" "),
    sub=" ", pch=20,ylim = c(-0.2, 1.07), col = 0, cex=2, xlab="Regressor Score", ylab="Probability")
points(x.m, prob.m[,1], type="l", lwd = 2, col=1, lty=1, cex=1)
points(x.m, prob.m[,2], type="l", col=4, lty=1, cex=1)
points(x.m, prob.m[,3], type="l", col=4, lty=1, cex=1)
arrows(x.order, prob.i.order[,2], x.order, prob.i.order[,3], col = 4, angle=90,
       code=3, length=0.0)

range.x <- range(x.order)
points(range.x, c(0.5, 0.5), type="l", lty=3, col = 1, lwd=2)
points(range.x, c(-.15, -0.15), type="l", lty=1, col = 1, lwd=2)
points(c(istar, istar), c(-0.07, 1.07), type="l", lty=3, col = 1, lwd=2)
points(x.order, prob.i.order[,1], pch=21, bg = col.vec.order, col = 1, cex=2)
points(x.order, rep(-0.15, length(x.order)), pch=21, bg = col.vec.order, col = 1, cex=2)

leg.txt <- order.class.phen
p.vec <- rep(21, 21)
c.vec <- c1
par(mar = c(0, 0, 0, 0))
plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
       col = "black", cex = 1.2, pt.cex=2)

savePlot(filename = paste(output.dir, file.iden, ".train.set.post.prob", sep=""),
         type ="jpeg", device = dev.cur())

# Plot Bayesian residuals

if (model.type == "Bayesian") {

   windows(height = 7, width = 9.5)
   nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(3.75, 1), heights = 1, respect = FALSE)

   residuals <- y - prob.i[,1]
   residuals.2 <- y - prob.i[,2]
   residuals.3 <- y - prob.i[,3]
   ylimits <- range(c(residuals.2, residuals.3))
   plot(prob.i[,1], residuals, main = train.ds, sub = "Training Set", pch=21, xlim = c(0, 1),
     ylim = ylimits, col=1, bg=col.vec, cex=2, xlab="Probability", ylab="Bayesian Residual")
   arrows(prob.i[,1], residuals.2, prob.i[,1], residuals.3, col = 4, angle=90, code=3, length=0.1)
   points(prob.i[,1], residuals, pch=21, ylim = c(-0.2, 1.07), col=1, bg=col.vec, cex=2)

   leg.txt <- order.class.phen
   p.vec <- rep(21, 21)
   c.vec <- c1
   par(mar = c(0, 0, 0, 0))
   plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
   legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
       col = "black", cex = 1.2, pt.cex=2)

   savePlot(filename = paste(output.dir, file.iden, ".train.set.post.resid", sep=""), type ="jpeg",
            device = dev.cur())
 }

### Apply model to test set 

# Read test dataset

dataset.test <- MSIG.Gct2Frame(filename = test.ds)
m.test <- data.matrix(dataset.test$ds)
gene.names.test <- dataset.test$row.names
gene.descs.test <- dataset.test$descs
sample.names.test <- dataset.test$names
Ns.test <- length(m.test[1,])
Ng.test <- length(m.test[,1])

if (thres.preprocess != "NULL") {
   m.test[m.test < thres.preprocess] <- thres.preprocess
}
if (ceiling.preprocess != "NULL") {
   m.test[m.test > ceiling.preprocess] <- ceiling.preprocess
}

# Rank normalization

if (rank.normalize.datasets == T) {
    cols <- length(m.test[1,])
    for (j in 1:cols) {  # column rank normalization from 0 to N - 1
       m.test[,j] <- rank(m.test[,j], ties.method = "average") - 1
    }
    m.test <- 10000*m.test/(length(m.test[,1]) - 1)  # data is rescaled in scale 0-10000
}

# Select genes in positive and control signature

set.p.test <- match(msig.list.p, gene.names.test)
size.set.p.test <- sum(!is.na(set.p.test))
msig.p.test <- matrix(0, nrow = size.set.p.test, ncol=Ns.test)
if (size.set.p.test > 1) {
  msig.p.test <- as.matrix(m.test[set.p.test[!is.na(set.p.test)],])
} else {
  msig.p.test <- t(as.matrix(m.test[set.p.test[!is.na(set.p.test)],]))
}

set.n.test <- match(msig.list.n, gene.names.test)
size.set.n.test <- sum(!is.na(set.n.test))
msig.n.test <- matrix(0, nrow = size.set.n.test, ncol=Ns.test)
if (size.set.n.test > 1) {
  msig.n.test <- as.matrix(m.test[set.n.test[!is.na(set.n.test)],])
} else {
  msig.n.test <- t(as.matrix(m.test[set.n.test[!is.na(set.n.test)],]))
}

# Read test set class labels

CLS.test <- MSIG.ReadClsFile(file=test.cls)
class.list.test <- CLS.test$class.list 
class.phen.test <- class.phen
order.class.phen.test <- order.class.phen
class.labels.test <- match(class.list.test, class.phen)

# plot signatures before normalization

# windows(width=24, height=20)
# nf <- layout(matrix(c(1, 2, 3, 4), 2, 2, byrow=T), widths = c(6, 2), heights = c(1.5, 1), respect = FALSE)

# MSIG.HeatMapPlot.3(V = msig.p.test, row.names = row.names(msig.p.test),
#                   col.labels = class.labels.test, col.classes = class.phen.test,
#                   phen.cmap = c.vec, col.names = sample.names.test,
#                   main = "Signature: test set before normalization", xlab=" ",
#                   ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

# leg.txt <- class.phen.test
# p.vec <- rep(21, 21)
# par(mar = c(0, 0, 0, 0))
# plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
# legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
#       col = "black", cex = 2, pt.cex=3)

# MSIG.HeatMapPlot.3(V = msig.n.test, row.names = row.names(msig.n.test),
#                   col.labels = class.labels.test, col.classes = class.phen.test,
#                   phen.cmap = c1, col.names = sample.names.test,
#                   main = "Control set: test set before cntrl normalization", xlab=" ",
#                   ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

# savePlot(filename = paste(output.dir, file.iden, ".test.set.b4.norm", sep=""), type ="jpeg",
#         device = dev.cur())

# Define baseline control as average of control signature

if (length(msig.list.n) == 1) {
   cntrl.test <- msig.n.test
} else {
   cntrl.test <- apply(msig.n.test, MARGIN=2, FUN=mean)
}
  
# Preprocessing: scaling with respect to average control signature

Ng2.test <- length(msig.p.test[,1])
m2.test <- matrix(0, nrow=Ng2.test, ncol=Ns.test)  
for (i in 1:Ng2.test) {
   m2.test[i,] <- msig.p.test[i,]/cntrl.test
}
  
# Plot signature after ratio

# windows(width=16, height=10)
# nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

# if (target.class == class.phen[1]) {
#   c.vec <- c1
# } else {
#    c.vec <- rev(c1)
# }

# MSIG.HeatMapPlot.3(V = m2.test, row.names = row.names(msig.p.test),
#                   col.labels = class.labels.test, col.classes = class.phen.test,
#                   phen.cmap = c.vec, col.names = sample.names.test,
#                   main = "Signature: test set after cntrl normalization", xlab=" ",
#                   ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

# leg.txt <- class.phen.test
# p.vec <- rep(21, 21)
# par(mar = c(0, 0, 0, 0))
# plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
# legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
#       col = "black", cex = 2, pt.cex=3)
  
# savePlot(filename = paste(output.dir, file.iden, ".test.set.after.ratio", sep=""),
#           type ="jpeg", device = dev.cur())


# Row normalization

m3.test <- matrix(0, nrow=Ng2.test, ncol=Ns.test)  
for (i in 1:Ng2.test) {
  if (row.sd[i] == 0) {
    m3.test[i,] <- 0
  } else { # using row.mean and row.sd from training set
    m3.test[i,] <- (m2.test[i,] - row.mean[i])/row.sd[i]  
  }
}

# Plot signature after normalization

# windows(width=16, height=10)
# nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

# if (target.class == class.phen[1]) {
#  c.vec <- c1
# } else {
#  c.vec <- rev(c1)
# }

# MSIG.HeatMapPlot.3(V = m3.test, row.names = row.names(msig.p.test),
#                     col.labels = class.labels.test, col.classes = class.phen.test,
#                     phen.cmap = c.vec, col.names = sample.names.test,
#                     main = "Signature: test set after row normalization", xlab=" ",
#                     ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

# leg.txt <- class.phen.test
# p.vec <- rep(21, 21)
# par(mar = c(0, 0, 0, 0))
# plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
# legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
#         col = "black", cex = 2, pt.cex=3)
  
# savePlot(filename = paste(output.dir, file.iden, ".test.set.after.norm", sep=""), type ="jpeg",
#         device = dev.cur())

# Compute average z-scores

z.vector.test <- apply(m3.test, MARGIN=2, FUN=mean)
z.vector.test.error <- apply(m3.test, MARGIN=2, FUN=sd)/sqrt(length(z.vector.test))

print("norm1 -- z.vector.test:")
print(z.vector.test)

# Plot z-score

windows(width=16, height=4)
nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(6, 2), respect = FALSE)

if (target.class == class.phen[1]) {
  c.vec <- c1
} else {
  c.vec <- rev(c1)
}

MSIG.HeatMapPlot.3(V = t(as.matrix(z.vector.test)), row.names = "regressor score",
                   col.labels = class.labels.test, col.classes = class.phen.test,
                   phen.cmap = c.vec, col.names = sample.names.test,
                   main = "Test regressor score", xlab=" ", ylab=" ", sub = " ", cmap.type = 4, row.norm=T) 

leg.txt <- class.phen.test
p.vec <- rep(21, 21)
par(mar = c(0, 0, 0, 0))
plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec,
       pt.bg = c.vec, col = "black", cex = 2, pt.cex=3)

savePlot(filename = paste(output.dir, file.iden, ".test.set.zscore", sep=""),
         type ="jpeg", device = dev.cur())

# Apply model   

n.obs.test <- Ns.test

if(! is.element(target.class, class.phen.test)) {
  stop("Target class not represented in test data (cls file)")
}

y.test  <- ifelse(class.list.test == target.class, 1, 0)
col.vec.test <- y.test

prob.i.test <- matrix(0, nrow = n.obs.test, ncol=3)

for (i in 1:n.obs.test) {
  if (link.function == "logit") { 
    p.vec.test <- (exp(beta0 + beta1 * z.vector.test[i])/(1 + exp(beta0 + beta1 * z.vector.test[i])))  
  } else if(link.function == "probit") {
    p.vec.test <- (erf(beta0 + beta1 * z.vector.test[i]) + 1)/2  # Probit
  } else {
    stop("Unknown link function")
  }
  prob.i.test[i,1] <- quantile(p.vec.test, probs=0.5)
  prob.i.test[i,2] <- quantile(p.vec.test, probs=0.05)
  prob.i.test[i,3] <- quantile(p.vec.test, probs=0.95)
}

# save probabilities in file 
  
V <- data.frame(rbind(class.labels.test, z.vector.test, prob.i.test[,1], prob.i.test[,2],
                      prob.i.test[,3]))
names(V) <- sample.names.test
row.names(V) <- c("class", "zscore", "prob_0.5", "prob_0.05", "prob_0.95")
descs <- c("class", "zscore", "prob_0.5", "prob_0.05", "prob_0.95")
write.gct(gct.data.frame = V, descs = descs, filename = paste(output.dir,
                                               file.iden, ".test.set.probs.gct", sep=""))  

x.index <- order(z.vector.test, decreasing=F)
x.order.test <- z.vector.test[x.index]
prob.i.order.test <- prob.i.test[x.index,]
col.vec.order.test <- col.vec.test[x.index]
col.vec.order.test <- ifelse(col.vec.order.test == 1, c1[1], c1[2])
col.vec.test <- ifelse(col.vec.test == 1, c1[1], c1[2])

windows(height = 7, width = 9.5)
nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(3.75, 1), heights = 1, respect = FALSE)

plot(x.order.test, prob.i.order.test[,1], main = paste("Probability for Target Class:", target.class, sep=" "),
    pch=20, ylim = c(-0.05, 1.07), col = 0, cex.axis=1.35, cex=3, cex.lab = 1.35, xlab="regressor score",
    ylab="Probability")

points(x.m, prob.m[,1], type="l", lwd = 2, col=1, lty=1, cex=1)
points(x.m, prob.m[,2], type="l", col=4, lty=1, cex=1)
points(x.m, prob.m[,3], type="l", col=4, lty=1, cex=1)

arrows(x.order.test, prob.i.order.test[,2], x.order.test, prob.i.order.test[,3],
col = 4, angle=90, code=3, length=0.0)

range.x <- range(x.order.test)
points(range.x, c(0.5, 0.5), type="l", lty=3, col = 1, lwd=2)

points(c(istar, istar), c(-0.07, 1.07), type="l", lty=3, col = 1, lwd=2)
points(x.order.test, prob.i.order.test[,1], pch=21, bg = col.vec.order.test, col = 1, cex=2)

leg.txt <- order.class.phen.test
p.vec <- rep(21, 21)
c.vec <- c1
par(mar = c(0, 0, 0, 0))
plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
       col = "black", cex = 1.2, pt.cex=2)

savePlot(filename = paste(output.dir, file.iden, ".test.set.post.prob", sep=""), type ="jpeg",
device = dev.cur())

                                        # Plot Bayesian residuals

if (model.type == "Bayesian") {
  
   windows(height = 7, width = 9.5)
   nf <- layout(matrix(c(1, 2), 1, 2, byrow=T), widths = c(3.75, 1), heights = 1, respect = FALSE)

   residuals.test <- y.test - prob.i.test[,1]
   residuals.2.test <- y.test - prob.i.test[,2]
   residuals.3.test <- y.test - prob.i.test[,3]

   ylimits <- range(c(residuals.2.test, residuals.3.test))
   plot(prob.i.test[,1], residuals.test, xlim = c(0, 1), ylim = ylimits, main = train.ds,
   sub = "Test Set", pch=21,  col=1, bg=col.vec, cex=2, xlab="Probability", ylab="Bayesian Residual")
   arrows(prob.i.test[,1], residuals.2.test, prob.i.test[,1], residuals.3.test, col = 4,
   angle=90, code=3, length=0.1)
   points(prob.i.test[,1], residuals.test, pch=21, ylim = c(-0.2, 1.07), col=1, bg=col.vec, cex=2)

   leg.txt <- order.class.phen.test
   p.vec <- rep(21, 21)
   c.vec <- c1
   par(mar = c(0, 0, 0, 0))
   plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
   legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
       col = "black", cex = 1.2, pt.cex=2)

   savePlot(filename = paste(output.dir, file.iden, ".test.set.post.resid", sep=""), type ="jpeg",
     device = dev.cur())
 }
# Training plot version 2 -----------------------------------------------------------
  
  windows(width=6, height=6)

  V.ind <- order(z.vector, decreasing=F)
  V2 <- matrix(0, nrow = length(m2[,1]), ncol=length(m2[1,]))      
  for (i in 1:length(m2[,1])) {
    V2[i,] <- m2[i, V.ind]
  }
  z.vector.sorted <- z.vector[V.ind] 
  z.vector.error <- apply(m3, MARGIN=2, FUN=sd)/sqrt(length(z.vector))
  z.vector.error.sorted <- z.vector.error[V.ind]
  class.labels2 <- class.labels[V.ind]
  class.list2 <- class.list[V.ind]
  sample.names2 <- sample.names[V.ind]

# save sorted z-score and phenotype in file 
  
V <- data.frame(rbind(class.labels, signif(z.vector, digits=3)))
names(V) <- sample.names
row.names(V) <- c("class", "z-score")
descs <- c("class", "z-score")
write.gct(gct.data.frame = V, descs = descs, filename =
          paste(output.dir, file.iden, ".train.set.zscore.gct", sep=""))  

  
  if (target.class == class.phen[1]) {
    c.vec <- c1
     c2 <- c1
  } else {
    c.vec <- rev(c1)
    c2 <- rev(c1)
  }

 # find brackets for z.score as seen in the training set  

  z.vector2 <- z.vector - istar
  z.vector.pos2 <- z.vector2[z.vector2 >= 0]
  pos.limit <- min(z.vector.pos2)
  z.vector.neg2 <- z.vector2[z.vector2 < 0]
  neg.limit <- max(z.vector.neg2)
  pos.limit2 <- pos.limit + istar
  neg.limit2 <- neg.limit + istar
  
  boundary <- istar
  pred.class <- ifelse (prob.i.order[,1] >= 0.5, 2, 1)

  MSIG.HeatMapPlot.3(V = V2, row.names = row.names(msig.p), col.labels = class.labels2,
col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names2, main = "Train Set",
xlab=" ", ylab=" ", sub = " ", cmap.type = 6, row.norm=T, char.rescale=1) 

  savePlot(filename = paste(output.dir, file.iden, ".paper.plot.1.train.set", sep=""),
type ="jpeg", device = dev.cur())

# char.rescale=1.2 for luminex all samples together

# Plot bar graph of z-scores

  windows(width=6, height=6)
  nf <- layout(matrix(c(1, 2), 2, 1, byrow=T), heights = c(3, 1), respect = FALSE)

#  z.vector.sorted <- z.vector.sorted - boundary
  
  z.range <- range(c(z.vector.sorted + z.vector.error.sorted,
                     z.vector.sorted - z.vector.error.sorted))
  
  par(mar = c(4, 14, 4, 1))
  
# following suggestion of referee not to use bar plots for z scores
  
  plot(seq(1, length(z.vector.sorted)), z.vector.sorted, ylab = "z-score",
ylim = c(floor(z.range[1]), ceiling(z.range[2])), pch = 22, cex = 2, font.axis = 1.3, main = "Train Set",
cex.lab = 1.25, cex.axis = 1.3, cex.names = 1.3, width = 0.75, col = c.vec[class.labels2],
xlab ="sorted sample index", lab = c(10, 10, 10))

  arrows(seq(1, length(z.vector.sorted)), z.vector.sorted + z.vector.error.sorted,
         seq(1, length(z.vector.sorted)), z.vector.sorted - z.vector.error.sorted,
         col = 1, angle=90, code=3, length=0.0)
  points(seq(1, length(z.vector.sorted)), z.vector.sorted, pch = 22, cex = 1.95, col = 1,
        bg = c.vec[class.labels2])
  
  lines(c(0, length(z.vector.sorted)), c(boundary, boundary),  lty = 1, col=1)

  lines(c(0, length(z.vector.sorted)), c(pos.limit2, pos.limit2), lty = 2, col=1)
  lines(c(0, length(z.vector.sorted)), c(neg.limit2, neg.limit2), lty = 2, col=1)

  leg.txt <- true.classes.names
  p.vec <- rep(22, 22)
  par(mar = c(2, 2, 2, 2))
  plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
  legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = rev(c.vec),
col = "black", cex = 1, pt.cex=2)

  savePlot(filename = paste(output.dir, file.iden, ".paper.plot.2.train.set", sep=""),
type ="jpeg", device = dev.cur())

  # Test plot version 2

windows(width=6, height=6)

V.ind <- order(z.vector.test, decreasing=F)
V2 <- matrix(0, nrow = length(m3.test[,1]), ncol=length(m3.test[1,]))      
for (i in 1:length(m3.test[,1])) {
  V2[i,] <- m3.test[i, V.ind]
}
z.vector.test.error <- apply(m3.test, MARGIN=2, FUN=sd)/sqrt(length(z.vector.test))
z.vector.sorted.test.error <- z.vector.test.error[V.ind]
z.vector.sorted.test <- z.vector.test[V.ind]
class.labels.test2 <- class.labels.test[V.ind]
class.list.test2 <- class.list.test[V.ind]
sample.names.test2 <- sample.names.test[V.ind]
z.vector.sorted.test2 <- z.vector.sorted.test 

# save sorted z-score and phenotype in file 
  
V <- data.frame(rbind(class.labels.test, signif(z.vector.test, digits=3)))
names(V) <- sample.names.test
row.names(V) <- c("class", "z-score")
descs <- c("class", "z-score")
write.gct(gct.data.frame = V, descs = descs, filename =
          paste(output.dir, file.iden, ".test.set.zscore.gct", sep=""))  

if (target.class == class.phen[1]) {
   c.vec <- c1
   c2 <- c1
} else {
   c.vec <- rev(c1)
   c2 <- rev(c1)
}
  
boundary <- istar
pred.class.test <- ifelse (prob.i.order.test[,1] >= 0.5, 2, 1)

MSIG.HeatMapPlot.3(V = V2, row.names = row.names(msig.p.test), col.labels = class.labels.test2,
    col.classes = class.phen, phen.cmap = c.vec, col.names = sample.names.test2, main = "Test Set",
    xlab=" ", ylab=" ", sub = " ", cmap.type = 6, row.norm=T, char.rescale=1) 

savePlot(filename = paste(output.dir, file.iden, ".paper.plot.1.test.set", sep=""),
type ="jpeg", device = dev.cur())

# Plot bar graph of z-scores

windows(width=6, height=6)
nf <- layout(matrix(c(1, 2), 2, 1, byrow=T), heights = c(3, 1), respect = FALSE)

z.range <- range(c(z.vector.sorted.test2 + z.vector.sorted.test.error,
                   z.vector.sorted.test2 - z.vector.sorted.test.error))
  
par(mar = c(4, 14, 4, 1))
  
plot(seq(1, length(z.vector.sorted.test2)), z.vector.sorted.test2, ylab = "z-score",
       ylim = c(floor(z.range[1]), ceiling(z.range[2])), pch = 22, cex = 2, main = "Test Set",
       font.axis = 1.3, cex.lab = 1.25, cex.axis = 1.0, cex.names = 1.3,
       width = 0.75, col = c.vec[class.labels.test2], xlab ="sorted sample index", lab = c(10, 10, 10))

arrows(seq(1, length(z.vector.sorted.test2)), z.vector.sorted.test2 + z.vector.sorted.test.error,
         seq(1, length(z.vector.sorted.test2)), z.vector.sorted.test2 - z.vector.sorted.test.error,
         col = 1, angle=90, code=3, length=0.0)
points(seq(1, length(z.vector.sorted.test2)), z.vector.sorted.test2, pch = 22, cex = 1.95, col = 1,
         bg = c.vec[class.labels.test2])
  
lines(c(0, length(z.vector.sorted.test2)), c(boundary, boundary), lty = 1, col=1)

lines(c(0, length(z.vector.sorted.test2)), c(pos.limit2, pos.limit2), lty = 2, col=1)
lines(c(0, length(z.vector.sorted.test2)), c(neg.limit2, neg.limit2), lty = 2, col=1)
  
leg.txt <- true.classes.names
p.vec <- rep(22, 22)
par(mar = c(2, 2, 2, 2))
plot(c(0,0), c(1, 1), xlim = c(0, 1), ylim = c(0, 1), axes=F, type="n", xlab = "", ylab="")
legend(x=0, y=0.8, legend=leg.txt, bty="n", xjust=0, yjust= 1, pch = p.vec, pt.bg = c.vec,
         col = "black", cex = 1, pt.cex=2)

savePlot(filename = paste(output.dir, file.iden, ".paper.plot.2.test.set", sep=""),
type ="jpeg", device = dev.cur())


# create heat map legend

# c.vec  <- c("green3", "red3") # color for target class and control class

# x <- matrix(c(seq(-6,6,1),seq(-6,6,1)),nrow=13,ncol=2) 

# MSIG.HeatMapPlot.3(V = x, row.names = (x), col.labels = c(0,1),
#    col.classes = "NA", phen.cmap = c.vec, col.names = "NA", main = "Test Set",
#    xlab=" ", ylab=" ", sub = " ", cmap.type = 6, row.norm=F, char.rescale=1) 

# MSIG.HeatMapPlot.3(V = t(x), row.names = c(" ", " "), col.labels = c(rep(1,6),rep(2,7)),
#    col.classes = c(1, 2), phen.cmap = c.vec, col.names = rep(" ", 13), main = "Test Set",
#    xlab=" ", ylab=" ", sub = " ", cmap.type = 6, row.norm=F, char.rescale=1)
# axis(side=1, at = seq(1, 13), labels = seq(-6,6,1), las=1, tick=FALSE, line=-1)
