X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-for-ebseq-generate-ngvector-from-clustering-info;fp=rsem-for-ebseq-generate-ngvector-from-clustering-info;h=312dbfa33c77339b530e67362e14e391e75f4259;hb=4496388fd52d4354c746f36b1998477f31c2b0dd;hp=0000000000000000000000000000000000000000;hpb=9210a5fece7ec2854eb834d5b2dcbe2d12fbebf1;p=rsem.git diff --git a/rsem-for-ebseq-generate-ngvector-from-clustering-info b/rsem-for-ebseq-generate-ngvector-from-clustering-info new file mode 100755 index 0000000..312dbfa --- /dev/null +++ b/rsem-for-ebseq-generate-ngvector-from-clustering-info @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript + +argv <- commandArgs(TRUE) +if (length(argv) != 2) { + cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n") + q(status = 1) +} + +data <- read.table(file = argv[1], stringsAsFactors = F) +idx <- data[,2] >= 0 +kmr <- kmeans(data[idx, 2], 3) +order <- order(kmr$centers) + +ngvec <- rep(0, length(idx)) +ngvec[idx] <- order[kmr$cluster] +ngvec[!idx] <- 3 + +write.table(ngvec, file = argv[2], row.names = F, col.names = F)