]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
fixed problem with trim.seqs writing over files if barcodes belonged to the same...
[mothur.git] / trimseqscommand.cpp
index 7835b7598ae6ddcc76001788d44daf1faf17cfdd..4947b38ec8acb49ac0925f8b277133cc18196519 100644 (file)
@@ -163,7 +163,7 @@ void TrimSeqsCommand::help(){
                m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
                m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
                m->mothurOut("The fasta parameter is required.\n");
-               m->mothurOut("The flip parameter .... The default is 0.\n");
+               m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
                m->mothurOut("The oligos parameter .... The default is "".\n");
                m->mothurOut("The maxambig parameter .... The default is -1.\n");
                m->mothurOut("The maxhomop parameter .... The default is 0.\n");
@@ -224,8 +224,9 @@ int TrimSeqsCommand::execute(){
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
                                        ifstream inFASTA;
+                                       int numSeqs;
                                        openInputFile(fastaFile, inFASTA);
-                                       int numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                                       getNumSeqs(inFASTA, numSeqs);
                                        inFASTA.close();
                                        
                                        lines.push_back(new linePair(0, numSeqs));
@@ -266,8 +267,9 @@ int TrimSeqsCommand::execute(){
                                if (m->control_pressed) {  return 0; }
                #else
                                ifstream inFASTA;
+                               int numSeqs;
                                openInputFile(fastaFile, inFASTA);
-                               int numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               getNumSeqs(inFASTA, numSeqs);
                                inFASTA.close();
                                
                                lines.push_back(new linePair(0, numSeqs));
@@ -281,10 +283,13 @@ int TrimSeqsCommand::execute(){
                for(int i=0;i<fastaFileNames.size();i++){
                        ifstream inFASTA;
                        string seqName;
-                       openInputFile(getRootName(fastaFile) + groupVector[i] + ".fasta", inFASTA);
+                       //openInputFile(getRootName(fastaFile) +  groupVector[i] + ".fasta", inFASTA);
+                       openInputFile(fastaFileNames[i], inFASTA);
                        ofstream outGroups;
-                       openOutputFile(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups", outGroups);
-                       outputNames.push_back(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups");
+                       string outGroupFilename = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "groups";
+                       openOutputFile(outGroupFilename, outGroups);
+                       //openOutputFile(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups", outGroups);
+                       outputNames.push_back(outGroupFilename);
                        
                        while(!inFASTA.eof()){
                                if(inFASTA.get() == '>'){
@@ -351,15 +356,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                for(int i=0;i<line->num;i++){
                                
                        if (m->control_pressed) { 
-                               inFASTA.close(); 
-                               outFASTA.close();
-                               scrapFASTA.close();
-                               if (oligoFile != "") {   outGroups.close();   }
-                               if(qFileName != "")     {       qFile.close();  }
-                               for(int i=0;i<fastaFileNames.size();i++){
-                                       fastaFileNames[i]->close();
-                                       delete fastaFileNames[i];
-                               }       
+                               inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") {    outGroups.close();   } if(qFileName != "")     {       qFile.close();  }
+                               for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
                                for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
                                return 0;
                        }
@@ -377,9 +375,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                if(qFileName != ""){
                                        if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
                                        else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
-                                       if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
+                                       
+                                       if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
                                                success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
                                        }
+
                                        if(!success)                    {       trashCode += 'q';                                                               }
                                }
                        
@@ -583,13 +583,13 @@ void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec){ //vector<ofstream*
                                }
                                else if(type == "barcode"){
                                        inOligos >> group;
-                                       barcodes[oligo]=index++;
+                                       barcodes[oligo]=index; index++;
                                        groupVector.push_back(group);
                                        
                                        if(allFiles){
                                                //outFASTAVec.push_back(new ofstream((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta").c_str(), ios::ate));
-                                               outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
-                                               outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
+                                               outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
+                                               outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
                                        }
                                }
                        }
@@ -648,7 +648,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){
                                                maxLength = it->first.length();
                                        }
                                }
-                               alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+bdiffs+1));  
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
 
                        }else{ alignment = NULL; } 
                        
@@ -682,6 +682,9 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){
                                
                                int newStart=0;
                                int numDiff = countDiffs(oligo, temp);
+                               
+//                             cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
+                               
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
@@ -762,7 +765,7 @@ int TrimSeqsCommand::stripForward(Sequence& seq){
                                                maxLength = forPrimer[i].length();
                                        }
                                }
-                               alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+pdiffs+1));  
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
 
                        }else{ alignment = NULL; } 
                        
@@ -995,32 +998,42 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){
 
 bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
        try {
+//             string rawSequence = seq.getUnaligned();
+//             int seqLength;  // = rawSequence.length();
+//             string name, temp, temp2;
+//             
+//             qFile >> name;
+//             
+//             //get rest of line
+//             temp = "";
+//             while (!qFile.eof())    {       
+//                     char c = qFile.get(); 
+//                     if (c == 10 || c == 13){        break;  }       
+//                     else { temp += c; }
+//             } 
+//     
+//             int pos = temp.find("length");
+//             if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine();  seqLength = 0;  }
+//             else {
+//                     string tempLength = temp.substr(pos);
+//                     istringstream iss (tempLength,istringstream::in);
+//                     iss >> temp;
+//             }
+//             
+//             splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242
+//             convert(temp, seqLength); //converts string to int
+//     
+//             if (name.length() != 0) {  if(name.substr(1) != seq.getName())  {       m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); }  } 
+               
                string rawSequence = seq.getUnaligned();
-               int seqLength;  // = rawSequence.length();
-               string name, temp, temp2;
+               int seqLength = seq.getNumBases();
+               bool success = 0;       //guilty until proven innocent
+               string name;
                
                qFile >> name;
+               if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
                
-               //get rest of line
-               temp = "";
-               while (!qFile.eof())    {       
-                       char c = qFile.get(); 
-                       if (c == 10 || c == 13){        break;  }       
-                       else { temp += c; }
-               } 
-       
-               int pos = temp.find("length");
-               if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine();  seqLength = 0;  }
-               else {
-                       string tempLength = temp.substr(pos);
-                       istringstream iss (tempLength,istringstream::in);
-                       iss >> temp;
-               }
-               
-               splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242
-               convert(temp, seqLength); //converts string to int
-       
-               if (name.length() != 0) {  if(name.substr(1) != seq.getName())  {       m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); }  } 
+               while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
                
                int score;
                int end = seqLength;
@@ -1028,7 +1041,7 @@ bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
                for(int i=0;i<seqLength;i++){
                        qFile >> score;
                        
-                       if(score <= qThreshold){
+                       if(score < qThreshold){
                                end = i;
                                break;
                        }