]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
index 0c21c8993552929a5299841a5089783f7b8c2aef..eb31e78ec151c7c0505bad45c16191d04520e6f1 100644 (file)
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
-               CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
-               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
-               CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
-               CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
-               CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
-               CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
-               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
-               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
-               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
-        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
-               CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
-               CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
-               CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
-               CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
-               CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
-               CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
-               CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
-               CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
-               CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
-               CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+               CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
+               CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
+               CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
+               CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
+               CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
+               CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
+               CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
+               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
+               CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
+               CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
+               CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
+               CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
+               CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
+               CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
+               CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
+               CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
+               CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
+               CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                        
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -101,31 +101,24 @@ string TrimSeqsCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){        
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string TrimSeqsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "qfile")            {   outputFileName =  "qual";   }
-            else if (type == "fasta")            {   outputFileName =  "fasta";   }
-            else if (type == "group")            {   outputFileName =  "groups";   }
-            else if (type == "name")            {   outputFileName =  "names";   }
-            else if (type == "count")            {   outputFileName =  "count_table";   }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
+        else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
+        else if (type == "group") {  pattern = "[filename],groups"; }
+        else if (type == "name") {  pattern = "[filename],[tag],names"; }
+        else if (type == "count") {  pattern = "[filename],[tag],count_table"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
+        exit(1);
+    }
 }
-
-
 //**********************************************************************************************************************
 
 TrimSeqsCommand::TrimSeqsCommand(){    
@@ -382,15 +375,18 @@ int TrimSeqsCommand::execute(){
                vector<vector<string> > qualFileNames;
                vector<vector<string> > nameFileNames;
                
-               string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+        variables["[tag]"] = "trim";
+               string trimSeqFile = getOutputFileName("fasta",variables);
+        string trimQualFile = getOutputFileName("qfile",variables);
                outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
-               
-               string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
+        
+        variables["[tag]"] = "scrap";
+               string scrapSeqFile = getOutputFileName("fasta",variables);
+        string scrapQualFile = getOutputFileName("qfile",variables);
                outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
                
-               string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
-               string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
-               
                if (qFileName != "") {
                        outputNames.push_back(trimQualFile);
                        outputNames.push_back(scrapQualFile);
@@ -398,8 +394,11 @@ int TrimSeqsCommand::execute(){
                        outputTypes["qfile"].push_back(scrapQualFile); 
                }
                
-               string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
-               string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+        variables["[tag]"] = "trim";
+               string trimNameFile = getOutputFileName("name",variables);
+        variables["[tag]"] = "scrap";
+               string scrapNameFile = getOutputFileName("name",variables);
                
                if (nameFile != "") {
                        m->readNames(nameFile, nameMap);
@@ -409,8 +408,11 @@ int TrimSeqsCommand::execute(){
                        outputTypes["name"].push_back(scrapNameFile); 
                }
         
-        string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
-               string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
+        variables["[tag]"] = "trim";
+        string trimCountFile = getOutputFileName("count",variables);
+        variables["[tag]"] = "scrap";
+               string scrapCountFile = getOutputFileName("count",variables);
                
                if (countfile != "") {
             CountTable ct;
@@ -429,7 +431,9 @@ int TrimSeqsCommand::execute(){
                if(oligoFile != ""){
                        createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
                        if ((createGroup) && (countfile == "")){
-                               outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
+                map<string, string> myvariables; 
+                myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                               outputGroupFileName = getOutputFileName("group",myvariables);
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
@@ -488,9 +492,11 @@ int TrimSeqsCommand::execute(){
                 m->openInputFile(it->first, in);
                 
                 ofstream out;
-                string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
-                if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
-                else {  thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
+                map<string, string> myvariables; 
+                myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
+                string thisGroupName = "";
+                if (countfile == "") { thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
+                else {  thisGroupName = getOutputFileName("count",variables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
                 m->openOutputFile(thisGroupName, out);
                 
                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
@@ -1036,10 +1042,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
-               for( int i=0; i<processors-1; i++){
+               for( int h=0; h<processors-1; h++){
                        
             string extension = "";
-                       if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+                       if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
             vector<vector<string> > tempNameFileNames = nameFileNames;
@@ -1081,14 +1087,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               tempFASTAFileNames,
                                               tempPrimerQualFileNames,
                                               tempNameFileNames,
-                                              lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+                                              lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
-                       hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+                       hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
                }
         
         //parent do my part
@@ -1103,8 +1109,32 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(trimNameFileName, temp);              temp.close();
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
+        vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+        vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+        vector<vector<string> > tempNameFileNames = nameFileNames;
+        if(allFiles){
+            ofstream temp;
+            string extension = toString(processors-1) + ".temp";
+            for(int i=0;i<tempFASTAFileNames.size();i++){
+                for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                    if (tempFASTAFileNames[i][j] != "") {
+                        tempFASTAFileNames[i][j] += extension;
+                        m->openOutputFile(tempFASTAFileNames[i][j], temp);                     temp.close();
+                        
+                        if(qFileName != ""){
+                            tempPrimerQualFileNames[i][j] += extension;
+                            m->openOutputFile(tempPrimerQualFileNames[i][j], temp);            temp.close();
+                        }
+                        if(nameFile != ""){
+                            tempNameFileNames[i][j] += extension;
+                            m->openOutputFile(tempNameFileNames[i][j], temp);          temp.close();
+                        }
+                    }
+                }
+            }
+        }
         
-               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
         processIDS.push_back(processors-1);
 
         
@@ -1495,7 +1525,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        
                                        
                                        ofstream temp;
-                                       fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
+                    map<string, string> variables; 
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                    variables["[tag]"] = comboGroupName;
+                                       fastaFileName = getOutputFileName("fasta", variables);
                                        if (uniqueNames.count(fastaFileName) == 0) {
                                                outputNames.push_back(fastaFileName);
                                                outputTypes["fasta"].push_back(fastaFileName);
@@ -1506,7 +1539,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        m->openOutputFile(fastaFileName, temp);         temp.close();
                                        
                                        if(qFileName != ""){
-                                               qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
+                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+                                               qualFileName = getOutputFileName("qfile", variables);
                                                if (uniqueNames.count(qualFileName) == 0) {
                                                        outputNames.push_back(qualFileName);
                                                        outputTypes["qfile"].push_back(qualFileName);
@@ -1517,7 +1551,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        }
                                        
                                        if(nameFile != ""){
-                                               nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
+                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                                               nameFileName = getOutputFileName("name", variables);
                                                if (uniqueNames.count(nameFileName) == 0) {
                                                        outputNames.push_back(nameFileName);
                                                        outputTypes["name"].push_back(nameFileName);