]> git.donarmstrong.com Git - rsem.git/blobdiff - EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info
changed output format to contain FPKM etc. ; fixed a bug for paired-end reads
[rsem.git] / EBSeq / rsem-for-ebseq-generate-ngvector-from-clustering-info
diff --git a/EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info b/EBSeq/rsem-for-ebseq-generate-ngvector-from-clustering-info
new file mode 100755 (executable)
index 0000000..312dbfa
--- /dev/null
@@ -0,0 +1,18 @@
+#!/usr/bin/env Rscript
+
+argv <- commandArgs(TRUE)
+if (length(argv) != 2) {
+  cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
+  q(status = 1)
+}
+
+data <- read.table(file = argv[1], stringsAsFactors = F)
+idx <- data[,2] >= 0
+kmr <- kmeans(data[idx, 2], 3)
+order <- order(kmr$centers)
+
+ngvec <- rep(0, length(idx))
+ngvec[idx] <- order[kmr$cluster]
+ngvec[!idx] <- 3
+
+write.table(ngvec, file = argv[2], row.names = F, col.names = F)