]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerauchimecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimerauchimecommand.cpp
index ae011906ded5c95b3aec8bfb57047e27ae42392b..9a25582ddad078665bbc3f7e098dc4cbcf047097 100644 (file)
 //**********************************************************************************************************************
 vector<string> ChimeraUchimeCommand::setParameters(){  
        try {
-               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
-               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
-               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
-               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
-               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
-               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
-               CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
-               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
-               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
-               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
-        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdups);
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
+               CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
+               CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
+               CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
+               CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
+               CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
+               CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
+               CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
+               CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
+               CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
+        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
 
                //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
-               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
-               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
-               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
-               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
-               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
-               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
-               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
+               CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
+               CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
+               CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
+               CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
+               CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
+               CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
+               CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
 
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -61,7 +62,7 @@ string ChimeraUchimeCommand::getHelpString(){
                string helpString = "";
                helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
@@ -103,26 +104,21 @@ string ChimeraUchimeCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){   
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ChimeraUchimeCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
-            else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
-            else if (type == "alns") {  outputFileName =  "uchime.alns"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
+        else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 ChimeraUchimeCommand::ChimeraUchimeCommand(){  
@@ -548,6 +544,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
                        minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
                        maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
+            
+            strand = validParameter.validFile(parameters, "strand", false);    if (strand == "not found")      {  strand = ""; }
                        
                        temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
                        ucl = m->isTrue(temp);
@@ -561,11 +559,11 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
                        skipgaps2 = m->isTrue(temp); 
             
-            string usedDups = "false";
+            
                        temp = validParameter.validFile(parameters, "dereplicate", false);      
                        if (temp == "not found") { 
                                if (groupfile != "")    {  temp = "false";                                      }
-                               else                    {  temp = "true"; usedDups = "";        }
+                               else                    {  temp = "true";       }
                        }
                        dups = m->isTrue(temp);
 
@@ -642,9 +640,11 @@ int ChimeraUchimeCommand::execute(){
                        int start = time(NULL); 
                        string nameFile = "";
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
-                       string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
+                       map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+                       string outputFileName = getOutputFileName("chimera", variables);
+                       string accnosFileName = getOutputFileName("accnos", variables);
+                       string alnsFileName = getOutputFileName("alns", variables);
                        string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                                
                        //you provided a groupfile
@@ -1235,6 +1235,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
                        cPara.push_back(tempa);
                }
+        
+        if (strand != "") {
+                       char* tempA = new char[9]; 
+                       *tempA = '\0'; strncat(tempA, "--strand", 8);
+                       cPara.push_back(tempA);
+                       char* tempa = new char[strand.length()+1];
+                       *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
+                       cPara.push_back(tempa);
+               }
                
                if (useAbskew) {
                        char* tempskew = new char[9];
@@ -1452,15 +1461,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                        
                        string name = "";
                        string chimeraFlag = "";
-                       in >> chimeraFlag >> name;
-                       
-                       //fix name if needed
-                       if (templatefile == "self") { 
-                               name = name.substr(0, name.length()-1); //rip off last /
-                               name = name.substr(0, name.find_last_of('/'));
+                       //in >> chimeraFlag >> name;
+                       
+            string line = m->getline(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            if (pieces.size() > 2) { 
+                name = pieces[1];
+                //fix name if needed
+                if (templatefile == "self") { 
+                    name = name.substr(0, name.length()-1); //rip off last /
+                    name = name.substr(0, name.find_last_of('/'));
+                }
+                
+                chimeraFlag = pieces[pieces.size()-1];
                        }
-                       
-                       for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
+                       //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
                        m->gobble(in);
                        
                        if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
@@ -1628,7 +1643,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        
                        uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
                        processIDS.push_back(i);
@@ -1760,7 +1775,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                        
                        uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                        
                        pDataArray.push_back(tempUchime);
                        processIDS.push_back(i);