]> git.donarmstrong.com Git - mothur.git/blobdiff - consensusseqscommand.cpp
changed random forest output filename
[mothur.git] / consensusseqscommand.cpp
index 4c7aefb340602efffa969b2612568299cbf1865b..3d671f98420e0fa7cf844aac647bd43feea959e8 100644 (file)
 //**********************************************************************************************************************
 vector<string> ConsensusSeqsCommand::setParameters(){  
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","fasta-name",false,false,true); parameters.push_back(plist);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -55,29 +55,23 @@ string ConsensusSeqsCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string ConsensusSeqsCommand::getOutputFileNameTag(string type, string inputName=""){   
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ConsensusSeqsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "fasta") {  outputFileName =  "cons.fasta"; }
-            else if (type == "name") {  outputFileName =  "cons.names"; }
-            else if (type == "count") {  outputFileName =  "cons.count.table"; }
-            else if (type == "summary") {  outputFileName =  "cons.summary"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ConsensusSeqsCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "fasta") {  pattern = "[filename],cons.fasta-[filename],[tag],cons.fasta"; } 
+        else if (type == "name") {  pattern = "[filename],cons.names-[filename],[tag],cons.names"; } 
+        else if (type == "count") {  pattern = "[filename],cons.count_table-[filename],[tag],cons.count_table"; }
+        else if (type == "summary") {  pattern = "[filename],cons.summary-[filename],[tag],cons.summary"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ConsensusSeqsCommand", "getOutputPattern");
+        exit(1);
+    }
 }
-
 //**********************************************************************************************************************
 ConsensusSeqsCommand::ConsensusSeqsCommand(){  
        try {
@@ -225,12 +219,14 @@ int ConsensusSeqsCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        int start = time(NULL);
+        
                readFasta();
                
                if (m->control_pressed) { return 0; }
                
                if (namefile != "") { readNames(); }
-        if (countfile != "") { ct.readTable(countfile);  }
+        if (countfile != "") { ct.readTable(countfile, true);  }
                
                if (m->control_pressed) { return 0; }
                
@@ -238,7 +234,9 @@ int ConsensusSeqsCommand::execute(){
                if (listfile == "") {
                        
                        ofstream outSummary;
-                       string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("summary");
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+                       string outputSummaryFile = getOutputFileName("summary", variables);
                        m->openOutputFile(outputSummaryFile, outSummary);
                        outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
                        outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
@@ -246,7 +244,7 @@ int ConsensusSeqsCommand::execute(){
                        outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
                        
                        ofstream outFasta;
-                       string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
+                       string outputFastaFile = getOutputFileName("fasta", variables);
                        m->openOutputFile(outputFastaFile, outFasta);
                        outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
         
@@ -314,7 +312,7 @@ int ConsensusSeqsCommand::execute(){
                
                }else {
                        
-                                               
+            
                        InputData* input = new InputData(listfile, "list");
                        ListVector* list = input->getListVector();
                        
@@ -395,8 +393,10 @@ int ConsensusSeqsCommand::execute(){
                        delete input;
                }
                
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the consensus sequences.");
+        
                m->mothurOutEndLine();
-               m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
                m->mothurOutEndLine();
                
@@ -415,32 +415,44 @@ int ConsensusSeqsCommand::processList(ListVector*& list){
        try{
                
                ofstream outSummary;
-               string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("summary");
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+        variables["[tag]"] = list->getLabel();
+               string outputSummaryFile = getOutputFileName("summary", variables);
                m->openOutputFile(outputSummaryFile, outSummary);
                outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
                outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
                
                ofstream outName;
-               string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("name");
+               string outputNameFile = getOutputFileName("name",variables);
                m->openOutputFile(outputNameFile, outName);
                outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile);
                
                ofstream outFasta;
-               string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + getOutputFileNameTag("fasta");
+               string outputFastaFile = getOutputFileName("fasta",variables);
                m->openOutputFile(outputFastaFile, outFasta);
                outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
                
                outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
                
+        string snumBins = toString(list->getNumBins());
                for (int i = 0; i < list->getNumBins(); i++) {
                        
                        if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
                        
                        string bin = list->get(i);
                        string consSeq = getConsSeq(bin, outSummary, i);
+            
+            string seqName = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) {
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { seqName += "0"; }
+            }
+            seqName += sbinNumber;
                        
-                       outFasta << ">seq" << (i+1) << endl << consSeq << endl;
-                       outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << bin << endl;
+                       outFasta << ">" << seqName << endl << consSeq << endl;
+                       outName << seqName << '\t' << seqName << "," << bin << endl;
                }
                
                outSummary.close(); outName.close(); outFasta.close();
@@ -610,10 +622,10 @@ char ConsensusSeqsCommand::getBase(vector<int> counts, int size){  //A,T,G,C,Gap
                
                //zero out counts that don't make the cutoff
                float percentage = (100.0 - cutoff) / 100.0;
-               int zeroCutoff = percentage * size;
-               
+        
                for (int i = 0; i < counts.size(); i++) {
-                       if (counts[i] < zeroCutoff) { counts[i] = 0; }
+            float countPercentage = counts[i] / (float) size;
+                       if (countPercentage < percentage) { counts[i] = 0; }
                }
                
                //any