]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
fixes while testing 1.33.0
[mothur.git] / trimseqscommand.cpp
index 709202f7d690c17e8a6edb7f5a7812047aec9177..951ce65d400bb36afc60df92e56e148c14ee271c 100644 (file)
@@ -24,7 +24,7 @@ vector<string> TrimSeqsCommand::setParameters(){
         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
-               CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
+               CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength);
                CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
@@ -34,6 +34,7 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
                CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
+        CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
@@ -61,7 +62,7 @@ string TrimSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
         helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
@@ -84,6 +85,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
+        helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
                helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
@@ -256,7 +258,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, maxHomoP);  
 
-                       temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
+                       temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "1"; }
                        m->mothurConvert(temp, minLength); 
                        
                        temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
@@ -329,6 +331,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
             temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
                        keepforward = m->isTrue(temp);
             
+            temp = validParameter.validFile(parameters, "logtransform", false);                if (temp == "not found") { temp = "F"; }
+                       logtransform = m->isTrue(temp);
+            
             temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
                        reorient = m->isTrue(temp);
                        
@@ -422,7 +427,7 @@ int TrimSeqsCommand::execute(){
                
                if (countfile != "") {
             CountTable ct;
-            ct.readTable(countfile);
+            ct.readTable(countfile, true, false);
             nameCount = ct.getNameMap();
                        outputNames.push_back(trimCountFile);
                        outputNames.push_back(scrapCountFile);
@@ -443,7 +448,7 @@ int TrimSeqsCommand::execute(){
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
-       
+        
         if (m->control_pressed) { return 0; }
             
         //fills lines and qlines
@@ -538,7 +543,7 @@ int TrimSeqsCommand::execute(){
             
             if (countfile != "") { //create countfile with group info included
                 CountTable* ct = new CountTable();
-                ct->readTable(trimCountFile);
+                ct->readTable(trimCountFile, true, false);
                 map<string, int> justTrimmedNames = ct->getNameMap();
                 delete ct;
                 
@@ -684,18 +689,30 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
             //create reoriented primer and barcode pairs
             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
             for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
-                cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
-                cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
                 rpairedPrimers[it->first] = tempPair;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
             }
             for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
-                cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
-                cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
-                oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                 rpairedBarcodes[it->first] = tempPair;
+                 //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
+            }
+            int index = rpairedBarcodes.size();
+            for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
             }
-            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+            
+            index = rpairedPrimers.size();
+            for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedPrimers[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
+            }
+
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); 
         }
         
                while (moreSeqs) {
@@ -715,14 +732,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        int currentSeqsDiffs = 0;
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
-                       //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+                       //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
             
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                 //cout << currQual.getName() << endl;
                        }
-                       
+                         
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
                                
@@ -738,10 +757,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                 
                                if(numBarcodes != 0){
                                        success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
-                                       if(success > bdiffs)            {       trashCode += 'b';       }
+                                       if(success > bdiffs)            {
+                        trashCode += 'b';
+                    }
                                        else{ currentSeqsDiffs += success;  }
                                }
-                               
+                               //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
                 if(numSpacers != 0){
                                        success = trimOligos->stripSpacer(currSeq, currQual);
                                        if(success > sdiffs)            {       trashCode += 's';       }
@@ -751,7 +772,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                 
                                if(numFPrimers != 0){
                                        success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
-                                       if(success > pdiffs)            {       trashCode += 'f';       }
+                                       if(success > pdiffs)            {
+                          trashCode += 'f';  
+                    }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
@@ -769,19 +792,19 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                     
                     int thisBarcodeIndex = 0;
                     int thisPrimerIndex = 0;
-                    
+                    //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
                     if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
-                        if(thisSuccess > bdiffs)               {       thisTrashCode += 'b';   }
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
+                        if(thisSuccess > bdiffs)               { thisTrashCode += "b"; }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
-                    
+                    //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
                     if(numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
-                        if(thisSuccess > pdiffs)               {       thisTrashCode += 'f';   }
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
+                        if(thisSuccess > pdiffs)               { thisTrashCode += "f"; }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
-                    
+                   
                     if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }
                     
                     if (thisTrashCode == "") { 
@@ -790,11 +813,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                         currentSeqsDiffs = thisCurrentSeqsDiffs;
                         barcodeIndex = thisBarcodeIndex;
                         primerIndex = thisPrimerIndex;
-                        currSeq.reverseComplement();
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
                         if(qFileName != ""){
-                            currQual.flipQScores();
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
                         }
-                    }
+                    }else { trashCode += "(" + thisTrashCode + ")";  }
                 }
                 
                                if(keepFirst != 0){
@@ -811,9 +836,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
-                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
-                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
-                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
+                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage, logtransform);                            }
+                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform);        }
+                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform);     }
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
@@ -1166,7 +1191,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
-                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, 
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
@@ -1383,9 +1408,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                         string sname = "";  nameStream >> sname;
                         sname = sname.substr(1);
                         
-                        for (int i = 0; i < sname.length(); i++) {
-                            if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                        }
+                        m->checkName(sname);
                         
                         map<string, int>::iterator it = firstSeqNames.find(sname);
                         
@@ -1525,7 +1548,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        // get rest of line in case there is a primer name
                                        while (!inOligos.eof()) {       
                                                char c = inOligos.get(); 
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        } 
@@ -1555,12 +1578,14 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        // get rest of line in case there is a primer name
                                        while (!inOligos.eof()) {
                                                char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        }
                     
                     oligosPair newPrimer(oligo, roligo);
+                    
+                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
                                        
                                        //check for repeat barcodes
                     string tempPair = oligo+roligo;
@@ -1588,7 +1613,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                     string temp = "";
                     while (!inOligos.eof())    {
                                                char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  temp += c;  }
                                        }
@@ -1604,7 +1629,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                             if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
                         }
                         
-                        oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+                        reverseBarcode = reverseOligo(reverseBarcode);
+                        oligosPair newPair(oligo, reverseBarcode);
                         
                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
                         
@@ -1640,7 +1666,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
         }
         
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
-               
+        
                //add in potential combos
                if(barcodeNameVector.size() == 0){
                        barcodes[""] = 0;
@@ -1841,7 +1867,13 @@ bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
                if(qscores.getName() != ""){
                        qscores.trimQScores(-1, keepFirst);
                }
+
+//        sequence.printSequence(cout);cout << endl;
+        
                sequence.trim(keepFirst);
+        
+//        sequence.printSequence(cout);cout << endl << endl;;
+
                return success;
        }
        catch(exception& e) {