]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerauchimecommand.cpp
added ignore option to oligos files for make.contigs, trim.seqs, trim.flows
[mothur.git] / chimerauchimecommand.cpp
index 478156814cb4e429bebc7f7f1c4f4d55db148d4a..04e1f1ddf244057e462d51c39c2913900d6424c3 100644 (file)
 //**********************************************************************************************************************
 vector<string> ChimeraUchimeCommand::setParameters(){  
        try {
-               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
-               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
-               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
-               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
-               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
-               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
-               CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
-               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
-               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
-               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
-        CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
+               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
+               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
+               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
+               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
+               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
+               CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
+               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
+               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
+               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
+        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
 
                //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
-               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
-               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
-               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
-               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
-               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
-               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
-               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
+               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
+               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
+               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
+               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
+               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
+               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
+               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
 
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -61,13 +61,13 @@ string ChimeraUchimeCommand::getHelpString(){
                string helpString = "";
                helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dups, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
-        helpString += "If the dups parameter is true, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=t.\n";
+        helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
                helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
@@ -103,26 +103,21 @@ string ChimeraUchimeCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){   
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ChimeraUchimeCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
-            else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
-            else if (type == "alns") {  outputFileName =  "uchime.alns"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
+        else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 ChimeraUchimeCommand::ChimeraUchimeCommand(){  
@@ -561,11 +556,11 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
                        skipgaps2 = m->isTrue(temp); 
             
-            string usedDups = "true";
-                       temp = validParameter.validFile(parameters, "dups", false);     
+            string usedDups = "false";
+                       temp = validParameter.validFile(parameters, "dereplicate", false);      
                        if (temp == "not found") { 
-                               if (groupfile != "")    {  temp = "true";                                       }
-                               else                    {  temp = "false"; usedDups = "";       }
+                               if (groupfile != "")    {  temp = "false";                                      }
+                               else                    {  temp = "true"; usedDups = "";        }
                        }
                        dups = m->isTrue(temp);
 
@@ -642,9 +637,11 @@ int ChimeraUchimeCommand::execute(){
                        int start = time(NULL); 
                        string nameFile = "";
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
-                       string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
+                       map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+                       string outputFileName = getOutputFileName("chimera", variables);
+                       string accnosFileName = getOutputFileName("accnos", variables);
+                       string alnsFileName = getOutputFileName("alns", variables);
                        string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                                
                        //you provided a groupfile
@@ -726,7 +723,7 @@ int ChimeraUchimeCommand::execute(){
                 if (hasCount) { delete cparser; }
                 else { delete sparser; }
                 
-                if (dups) { 
+                if (!dups) { 
                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
                                
                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
@@ -1452,15 +1449,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        
                        string name = "";
                        string chimeraFlag = "";
-                       in >> chimeraFlag >> name;
-                       
-                       //fix name if needed
-                       if (templatefile == "self") { 
-                               name = name.substr(0, name.length()-1); //rip off last /
-                               name = name.substr(0, name.find_last_of('/'));
+                       //in >> chimeraFlag >> name;
+                       
+            string line = m->getline(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            if (pieces.size() > 2) { 
+                name = pieces[1];
+                //fix name if needed
+                if (templatefile == "self") { 
+                    name = name.substr(0, name.length()-1); //rip off last /
+                    name = name.substr(0, name.find_last_of('/'));
+                }
+                
+                chimeraFlag = pieces[pieces.size()-1];
                        }
-                       
-                       for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
+                       //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
                        m->gobble(in);
                        
                        if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }