]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
made a small mod to the alignment parameters in trim.seqs
[mothur.git] / trimseqscommand.cpp
index 7835b7598ae6ddcc76001788d44daf1faf17cfdd..580fc22c978e0105b464e214593599fe15970f1d 100644 (file)
@@ -163,7 +163,7 @@ void TrimSeqsCommand::help(){
                m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
                m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
                m->mothurOut("The fasta parameter is required.\n");
-               m->mothurOut("The flip parameter .... The default is 0.\n");
+               m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
                m->mothurOut("The oligos parameter .... The default is "".\n");
                m->mothurOut("The maxambig parameter .... The default is -1.\n");
                m->mothurOut("The maxhomop parameter .... The default is 0.\n");
@@ -224,8 +224,9 @@ int TrimSeqsCommand::execute(){
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
                                        ifstream inFASTA;
+                                       int numSeqs;
                                        openInputFile(fastaFile, inFASTA);
-                                       int numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                                       getNumSeqs(inFASTA, numSeqs);
                                        inFASTA.close();
                                        
                                        lines.push_back(new linePair(0, numSeqs));
@@ -266,8 +267,9 @@ int TrimSeqsCommand::execute(){
                                if (m->control_pressed) {  return 0; }
                #else
                                ifstream inFASTA;
+                               int numSeqs;
                                openInputFile(fastaFile, inFASTA);
-                               int numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               getNumSeqs(inFASTA, numSeqs);
                                inFASTA.close();
                                
                                lines.push_back(new linePair(0, numSeqs));
@@ -377,9 +379,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                if(qFileName != ""){
                                        if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
                                        else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
-                                       if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
+                                       
+                                       if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
                                                success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
                                        }
+
                                        if(!success)                    {       trashCode += 'q';                                                               }
                                }
                        
@@ -648,7 +652,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){
                                                maxLength = it->first.length();
                                        }
                                }
-                               alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+bdiffs+1));  
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
 
                        }else{ alignment = NULL; } 
                        
@@ -682,6 +686,9 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){
                                
                                int newStart=0;
                                int numDiff = countDiffs(oligo, temp);
+                               
+//                             cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
+                               
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
@@ -762,7 +769,7 @@ int TrimSeqsCommand::stripForward(Sequence& seq){
                                                maxLength = forPrimer[i].length();
                                        }
                                }
-                               alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+pdiffs+1));  
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
 
                        }else{ alignment = NULL; } 
                        
@@ -995,32 +1002,42 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){
 
 bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
        try {
+//             string rawSequence = seq.getUnaligned();
+//             int seqLength;  // = rawSequence.length();
+//             string name, temp, temp2;
+//             
+//             qFile >> name;
+//             
+//             //get rest of line
+//             temp = "";
+//             while (!qFile.eof())    {       
+//                     char c = qFile.get(); 
+//                     if (c == 10 || c == 13){        break;  }       
+//                     else { temp += c; }
+//             } 
+//     
+//             int pos = temp.find("length");
+//             if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine();  seqLength = 0;  }
+//             else {
+//                     string tempLength = temp.substr(pos);
+//                     istringstream iss (tempLength,istringstream::in);
+//                     iss >> temp;
+//             }
+//             
+//             splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242
+//             convert(temp, seqLength); //converts string to int
+//     
+//             if (name.length() != 0) {  if(name.substr(1) != seq.getName())  {       m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); }  } 
+               
                string rawSequence = seq.getUnaligned();
-               int seqLength;  // = rawSequence.length();
-               string name, temp, temp2;
+               int seqLength = seq.getNumBases();
+               bool success = 0;       //guilty until proven innocent
+               string name;
                
                qFile >> name;
+               if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
                
-               //get rest of line
-               temp = "";
-               while (!qFile.eof())    {       
-                       char c = qFile.get(); 
-                       if (c == 10 || c == 13){        break;  }       
-                       else { temp += c; }
-               } 
-       
-               int pos = temp.find("length");
-               if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine();  seqLength = 0;  }
-               else {
-                       string tempLength = temp.substr(pos);
-                       istringstream iss (tempLength,istringstream::in);
-                       iss >> temp;
-               }
-               
-               splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242
-               convert(temp, seqLength); //converts string to int
-       
-               if (name.length() != 0) {  if(name.substr(1) != seq.getName())  {       m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); }  } 
+               while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
                
                int score;
                int end = seqLength;
@@ -1028,7 +1045,7 @@ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
                for(int i=0;i<seqLength;i++){
                        qFile >> score;
                        
-                       if(score <= qThreshold){
+                       if(score < qThreshold){
                                end = i;
                                break;
                        }