]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerauchimecommand.cpp
Added flow=I to trim.flows
[mothur.git] / chimerauchimecommand.cpp
index 478156814cb4e429bebc7f7f1c4f4d55db148d4a..9a25582ddad078665bbc3f7e098dc4cbcf047097 100644 (file)
 //**********************************************************************************************************************
 vector<string> ChimeraUchimeCommand::setParameters(){  
        try {
-               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
-               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
-               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
-               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
-               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
-               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
-               CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
-               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
-               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
-               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
-        CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
+               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
+               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
+               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
+               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
+               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
+               CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
+               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
+               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
+               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
+        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
 
                //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
-               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
-               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
-               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
-               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
-               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
-               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
-               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
+               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
+               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
+               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
+               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
+               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
+               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
+               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
 
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -61,13 +62,13 @@ string ChimeraUchimeCommand::getHelpString(){
                string helpString = "";
                helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dups, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
-        helpString += "If the dups parameter is true, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=t.\n";
+        helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
                helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
@@ -103,26 +104,21 @@ string ChimeraUchimeCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){   
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ChimeraUchimeCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
-            else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
-            else if (type == "alns") {  outputFileName =  "uchime.alns"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
+        else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 ChimeraUchimeCommand::ChimeraUchimeCommand(){  
@@ -548,6 +544,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
                        minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
                        maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
+            
+            strand = validParameter.validFile(parameters, "strand", false);    if (strand == "not found")      {  strand = ""; }
                        
                        temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
                        ucl = m->isTrue(temp);
@@ -561,11 +559,11 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
                        skipgaps2 = m->isTrue(temp); 
             
-            string usedDups = "true";
-                       temp = validParameter.validFile(parameters, "dups", false);     
+            
+                       temp = validParameter.validFile(parameters, "dereplicate", false);      
                        if (temp == "not found") { 
-                               if (groupfile != "")    {  temp = "true";                                       }
-                               else                    {  temp = "false"; usedDups = "";       }
+                               if (groupfile != "")    {  temp = "false";                                      }
+                               else                    {  temp = "true";       }
                        }
                        dups = m->isTrue(temp);
 
@@ -642,9 +640,11 @@ int ChimeraUchimeCommand::execute(){
                        int start = time(NULL); 
                        string nameFile = "";
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
-                       string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
+                       map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+                       string outputFileName = getOutputFileName("chimera", variables);
+                       string accnosFileName = getOutputFileName("accnos", variables);
+                       string alnsFileName = getOutputFileName("alns", variables);
                        string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                                
                        //you provided a groupfile
@@ -726,7 +726,7 @@ int ChimeraUchimeCommand::execute(){
                 if (hasCount) { delete cparser; }
                 else { delete sparser; }
                 
-                if (dups) { 
+                if (!dups) { 
                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
                                
                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
@@ -1235,6 +1235,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
                        cPara.push_back(tempa);
                }
+        
+        if (strand != "") {
+                       char* tempA = new char[9]; 
+                       *tempA = '\0'; strncat(tempA, "--strand", 8);
+                       cPara.push_back(tempA);
+                       char* tempa = new char[strand.length()+1];
+                       *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
+                       cPara.push_back(tempa);
+               }
                
                if (useAbskew) {
                        char* tempskew = new char[9];
@@ -1452,15 +1461,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        
                        string name = "";
                        string chimeraFlag = "";
-                       in >> chimeraFlag >> name;
-                       
-                       //fix name if needed
-                       if (templatefile == "self") { 
-                               name = name.substr(0, name.length()-1); //rip off last /
-                               name = name.substr(0, name.find_last_of('/'));
+                       //in >> chimeraFlag >> name;
+                       
+            string line = m->getline(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            if (pieces.size() > 2) { 
+                name = pieces[1];
+                //fix name if needed
+                if (templatefile == "self") { 
+                    name = name.substr(0, name.length()-1); //rip off last /
+                    name = name.substr(0, name.find_last_of('/'));
+                }
+                
+                chimeraFlag = pieces[pieces.size()-1];
                        }
-                       
-                       for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
+                       //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
                        m->gobble(in);
                        
                        if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
@@ -1628,7 +1643,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        
                        uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
                        processIDS.push_back(i);
@@ -1760,7 +1775,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                        
                        uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
                        processIDS.push_back(i);