]> git.donarmstrong.com Git - mothur.git/commitdiff
pat's changes to seq.error command
authorpschloss <pschloss>
Mon, 20 Dec 2010 12:29:36 +0000 (12:29 +0000)
committerpschloss <pschloss>
Mon, 20 Dec 2010 12:29:36 +0000 (12:29 +0000)
13 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
mothur
qualityscores.cpp
qualityscores.h
reportfile.cpp [new file with mode: 0644]
reportfile.h [new file with mode: 0644]
seqerrorcommand.cpp
seqerrorcommand.h
sequence.cpp
sequence.hpp
trimseqscommand.cpp
trimseqscommand.h

index 560250413db85ada5d25ef760a16144c8ab77542..9cd6afa251389d08ae8459403207076b39be8917 100644 (file)
@@ -7,6 +7,8 @@
        objects = {
 
 /* Begin PBXFileReference section */
+               7E13BDD112BE3FEE004B8A53 /* reportfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reportfile.h; sourceTree = "<group>"; };
+               7E13BDD212BE3FEF004B8A53 /* reportfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reportfile.cpp; sourceTree = "<group>"; };
                7E4EBD43122018FB00D85E7B /* simpsoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simpsoneven.h; sourceTree = "<group>"; };
                7E4EBD44122018FB00D85E7B /* simpsoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simpsoneven.cpp; sourceTree = "<group>"; };
                7E5B28DC121FEFCC0005339C /* shannoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shannoneven.h; sourceTree = "<group>"; };
                                A7DA2174113FECD400BF472F /* validparameter.h */,
                                A7DA2175113FECD400BF472F /* venn.cpp */,
                                A7DA2176113FECD400BF472F /* venn.h */,
+                               7E13BDD112BE3FEE004B8A53 /* reportfile.h */,
+                               7E13BDD212BE3FEF004B8A53 /* reportfile.cpp */,
                        );
                        name = mothur;
                        sourceTree = "<group>";
index 87ca6cbf64f873797aebb733ebb727b2e6f840f0..39408427d87d439d6cdf8e637dd656df5049becd 100644 (file)
@@ -555,7 +555,8 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        if (m->control_pressed) {  return 0; }
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
-                       
+                       report.setCandidate(candidateSeq);
+
                        int origNumBases = candidateSeq->getNumBases();
                        string originalUnaligned = candidateSeq->getUnaligned();
                        int numBasesNeeded = origNumBases * threshold;
@@ -618,7 +619,6 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                        accnosFile << candidateSeq->getName() << wasBetter << endl;
                                }
                                
-                               report.setCandidate(candidateSeq);
                                report.setTemplate(templateSeq);
                                report.setSearchParameters(search, searchScore);
                                report.setAlignmentParameters(align, alignment);
@@ -706,7 +706,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                        istringstream iss (tempBuf,istringstream::in);
 
                        Sequence* candidateSeq = new Sequence(iss);  
-       
+                       report.setCandidate(candidateSeq);
+
                        int origNumBases = candidateSeq->getNumBases();
                        string originalUnaligned = candidateSeq->getUnaligned();
                        int numBasesNeeded = origNumBases * threshold;
@@ -777,7 +778,6 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                        MPIWroteAccnos = true;
                                }
                                
-                               report.setCandidate(candidateSeq);
                                report.setTemplate(templateSeq);
                                report.setSearchParameters(search, searchScore);
                                report.setAlignmentParameters(align, alignment);
diff --git a/mothur b/mothur
index f7fdc1055fed0e8482a2986df295875fb1d08a1b..1389411c6e76765861114a1993ea56145bcebdf1 100755 (executable)
Binary files a/mothur and b/mothur differ
index f76d9826ee0224cb63d3520a30a0f7b100a5cecf..2ee71e066f22e421b7f312be03da1279b2aa3a7a 100644 (file)
@@ -34,11 +34,8 @@ QualityScores::QualityScores(ifstream& qFile, int l){
                seqLength = l;
                int score;
                
-               //string line;
-               //m->getline(qFile, line); 
-               //istringstream nameStream(line);
-       
                qFile >> seqName; 
+
                while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13 || c == -1){       break;  }       } // get rest of line 
                m->gobble(qFile);
                if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
@@ -106,8 +103,6 @@ QualityScores::QualityScores(ifstream& qFile, int l){
 }
 /**************************************************************************************************/
 
-/**************************************************************************************************/
-
 string QualityScores::getName(){
        
        try {
@@ -151,8 +146,10 @@ void QualityScores::trimQScores(int start, int end){
                        qScores = hold;         
                }
                if(start == -1){
-                       hold = vector<int>(qScores.begin(), qScores.begin()+end);       //not sure if indexing is correct
-                       qScores = hold;         
+                       if(qScores.size() > end){
+                               hold = vector<int>(qScores.begin(), qScores.begin()+end);
+                               qScores = hold;         
+                       }
                }
 
                seqLength = qScores.size();
@@ -267,7 +264,8 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
                int end = windowSize;
                int start = 0;
                
-
+               if(seqLength < windowSize) {    return 0;       }
+               
                while(start < seqLength){
                        double windowSum = 0.0000;
 
@@ -340,3 +338,24 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
 }
 
 /**************************************************************************************************/
+
+void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
+       try {
+       
+               for(int i=start-1;i<stop;i++){
+                       
+                       if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[i]] += weight;        }
+                       else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[i]] += weight;        }
+                       else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[i]] += weight;        }
+                       else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[i]] += weight;        }
+                       else if(errorSeq[i] == 'd')     {       /*              nothing         */                                              }
+
+               }       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "updateQScoreErrorMap");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
index 5db43113f1b4aedea7761cd4ad7bcc3575542919..dee5917208ed19992ffa9d2c7c6cb7ddaba6f85a 100644 (file)
@@ -29,6 +29,7 @@ public:
        bool stripQualRollingAverage(Sequence&, double);
        bool stripQualWindowAverage(Sequence&, int, int, double);
        bool cullQualAverage(Sequence&, double);
+       void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
 private:
        
        double calculateAverage();
diff --git a/reportfile.cpp b/reportfile.cpp
new file mode 100644 (file)
index 0000000..f392ed9
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ *  reportfile.cpp
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 12/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "reportfile.h"
+
+/**************************************************************************************************/
+
+ReportFile::ReportFile(){
+       try {
+               m = MothurOut::getInstance();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReportFile", "ReportFile");
+               exit(1);
+       }                                                       
+}
+
+/**************************************************************************************************/
+
+ReportFile::ReportFile(ifstream& repFile, string repFileName){
+       try {           
+               m = MothurOut::getInstance();
+               
+               m->openInputFile(repFileName, repFile);
+               m->getline(repFile);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReportFile", "ReportFile");
+               exit(1);
+       }                                                       
+}
+
+
+/**************************************************************************************************/
+
+ReportFile::ReportFile(ifstream& repFile){
+       try {
+               
+               m = MothurOut::getInstance();
+               
+               repFile >> queryName;
+               repFile >> queryLength;
+               repFile >> templateName;
+               repFile >> templateLength;
+               repFile >> searchMethod;
+               repFile >> searchScore;
+               repFile >> alignmentMethod;
+               repFile >> queryStart;
+               repFile >> queryEnd;
+               repFile >> templateStart;
+               repFile >> templateEnd;
+               repFile >> pairwiseAlignmentLength;
+               repFile >> gapsInQuery;
+               repFile >> gapsInTemplate;
+               repFile >> longestInsert;
+               repFile >> simBtwnQueryAndTemplate;
+
+               m->gobble(repFile);             
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReportFile", "ReportFile");
+               exit(1);
+       }                                                       
+       
+}
+
+/**************************************************************************************************/
diff --git a/reportfile.h b/reportfile.h
new file mode 100644 (file)
index 0000000..1bd2c94
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef REPORTFILE
+#define REPORTFILE
+
+/*
+ *  reportfile.h
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 7/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class ReportFile {
+public:
+       ReportFile();
+       ReportFile(ifstream&);
+       ReportFile(ifstream&, string);
+       
+       string getQueryName()                           {       return queryName;                               }
+       string getTemplateName()                        {       return templateName;                    }
+       string getSearchMethod()                        {       return searchMethod;                    }
+       string getAlignmentMethod()                     {       return alignmentMethod;                 }
+       
+       int getQueryLength()                            {       return queryLength;                             }
+       int getTemplateLength()                         {       return templateLength;                  }
+       int getQueryStart()                                     {       return queryStart;                              }
+       int getQueryEnd()                                       {       return queryEnd;                                }
+       int getTemplateStart()                          {       return templateStart;                   }
+       int getTemplateEnd()                            {       return templateEnd;                             }
+       int getPairwiseAlignmentLength()        {       return pairwiseAlignmentLength; }
+       int getGapsInQuery()                            {       return gapsInQuery;                             }
+       int getGapsInTemplate()                         {       return gapsInTemplate;                  }
+       int getLongestInsert()                          {       return longestInsert;                   }
+       
+       float getSearchScore()                          {       return searchScore;                             }
+       float getSimBtwnQueryAndTemplate()      {       return simBtwnQueryAndTemplate; }
+       
+
+private:
+       
+       MothurOut* m;
+               
+       string queryName, templateName, searchMethod, alignmentMethod;
+       int queryLength, templateLength, queryStart, queryEnd, templateStart, templateEnd, pairwiseAlignmentLength, gapsInQuery, gapsInTemplate, longestInsert;
+       float searchScore, simBtwnQueryAndTemplate;
+       
+};
+
+/**************************************************************************************************/
+
+#endif
index ea049544c818f217f84931577b604af9e24e4d6e..77b12ed84a0adfbc58b2dc5a202691034b69eb7d 100644 (file)
@@ -8,11 +8,13 @@
  */
 
 #include "seqerrorcommand.h"
+#include "reportfile.h"
+#include "qualityscores.h"
 
 //**********************************************************************************************************************
 vector<string> SeqErrorCommand::getValidParameters(){  
        try {
-               string Array[] =  {"query", "reference", "name", "threshold", "inputdir", "outputdir"};
+               string Array[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -72,7 +74,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        string temp;
                        
                        //valid paramters for this command
-                       string AlignArray[] =  {"query", "reference", "name", "threshold", "inputdir", "outputdir"};
+                       string AlignArray[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
                        
 //need to implement name file option
                        
@@ -116,12 +118,28 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                }
                                
                                it = parameters.find("name");
-                               //user has given a template file
+                               //user has given a names file
                                if(it != parameters.end()){ 
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+
+                               it = parameters.find("qfile");
+                               //user has given a quality score file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("report");
+                               //user has given a alignment report file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["report"] = inputDir + it->second;           }
+                               }
                                
                        }
                        //check for required parameters
@@ -133,10 +151,24 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
                        else if (referenceFileName == "not open") { abort = true; }     
                        
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+
+                       //check for optional parameters
                        namesFileName = validParameter.validFile(parameters, "name", true);
                        if(namesFileName == "not found"){       namesFileName = "";     }
-                       cout << namesFileName << endl;
+                       
+                       qualFileName = validParameter.validFile(parameters, "qfile", true);
+                       if(qualFileName == "not found"){        qualFileName = "";      }
+
+                       reportFileName = validParameter.validFile(parameters, "report", true);
+                       if(reportFileName == "not found"){      reportFileName = "";    }
+                       
+                       if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){
+                               m->mothurOut("if you use either a qual file or a report file, you have to have both.");
+                               m->mothurOutEndLine();
+                               abort = true; 
+                       }
+                       
+                       
                        
                        outputDir = validParameter.validFile(parameters, "outputdir", false);
                        if (outputDir == "not found"){  
@@ -205,7 +237,18 @@ int SeqErrorCommand::execute(){
                
                ifstream queryFile;
                m->openInputFile(queryFileName, queryFile);
-                               
+               
+               ifstream reportFile;
+               ifstream qualFile;
+
+               ReportFile report;
+               QualityScores quality;
+
+               if(qualFileName != "" && reportFileName != ""){
+                       m->openInputFile(qualFileName, qualFile);
+                       report = ReportFile(reportFile, reportFileName);
+               }
+               
                int totalBases = 0;
                int totalMatches = 0;
                
@@ -214,10 +257,14 @@ int SeqErrorCommand::execute(){
                int numSeqs = 0;
                
                map<string, int>::iterator it;
+               map<char, vector<int> > qScoreErrorMap;
+               qScoreErrorMap['m'].assign(41, 0);
+               qScoreErrorMap['s'].assign(41, 0);
+               qScoreErrorMap['i'].assign(41, 0);
+               qScoreErrorMap['a'].assign(41, 0);
                
                while(queryFile){
                        Compare minCompare;
-                       
                        Sequence query(queryFile);
                        
                        for(int i=0;i<numRefs;i++){
@@ -226,19 +273,27 @@ int SeqErrorCommand::execute(){
                                if(currCompare.errorRate < minCompare.errorRate){
                                        minCompare = currCompare;
                                }
-                               
                        }
 
                        if(namesFileName != ""){
                                it = weights.find(query.getName());
                                minCompare.weight = it->second;
                        }
-                       else {
-                               minCompare.weight = 1;
-                       }
+                       else    {       minCompare.weight = 1;  }
 
                        printErrorData(minCompare);
 
+                       if(qualFileName != "" && reportFileName != ""){
+                               report = ReportFile(reportFile);
+                               
+                               int origLength = report.getQueryLength();
+                               int startBase = report.getQueryStart();
+                               int endBase = report.getQueryEnd();
+
+                               quality = QualityScores(qualFile, origLength);
+                               quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
+                       }                       
+                       
                        if(minCompare.errorRate < threshold){
                                totalBases += (minCompare.total * minCompare.weight);
                                totalMatches += minCompare.matches * minCompare.weight;
@@ -250,11 +305,23 @@ int SeqErrorCommand::execute(){
                                numSeqs++;
                        }
                        
+                       
                }
                queryFile.close();
                
                int total = 0;
                
+               if(qualFileName != "" && reportFileName != ""){
+                       string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
+                       ofstream errorQualityFile;
+                       m->openOutputFile(errorQualityFileName, errorQualityFile);
+                       outputNames.push_back(errorQualityFileName);  outputTypes["error.quality"].push_back(errorQualityFileName);
+                       
+                       errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
+                       for(int i=0;i<41;i++){
+                               errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
+                       }
+               }
                
                string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
                ofstream errorCountFile;
index c8dd4efb232b4e5c2ce0727c8d5badaf2433d6a9..a012d68e614e82c18a59fe79c115e213a614d35b 100644 (file)
@@ -60,7 +60,7 @@ private:
        void printErrorHeader();
        void printErrorData(Compare);
        
-       string queryFileName, referenceFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir;
+       string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir;
        double threshold;
        int numRefs;
        ofstream errorSummaryFile, errorSeqFile;
index 78fa266105c6be21ce5442250ef8c00f389ab078..42c26d59ab4df8cd9e0d75937a476617135adbd7 100644 (file)
@@ -565,4 +565,16 @@ void Sequence::reverseComplement(){
        aligned = temp;
        
 }
-/**************************************************************************************************/
+
+//********************************************************************************************************************
+
+void Sequence::trim(int length){
+       
+       if(numBases > length){
+               unaligned = unaligned.substr(0,length);
+               numBases = length;
+       }
+       
+}
+
+///**************************************************************************************************/
index c634f78b6522e43742800b0a4541d910eb7d6be3..48287af367092715ab999b9ffabf02229c68cacc 100644 (file)
@@ -37,6 +37,7 @@ public:
        void setAligned(string);
        void setLength();
        void reverseComplement();
+       void trim(int);
        
        string convert2ints();
        string getName();
index acbead2ed3a77171980ab49e4d8e098dce3c52d8..992626113bfffd8c103b7ee91cc20a81d229f7eb 100644 (file)
@@ -14,7 +14,9 @@
 vector<string> TrimSeqsCommand::getValidParameters(){  
        try {
                string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
-                                                                       "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
+                                                                       "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
+                                                                       "keepfirst", "removelast",
+                                                                       "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -74,8 +76,10 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string AlignArray[] =  {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
-                                                                       "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
+                       string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
+                                                               "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
+                                                               "keepfirst", "removelast",
+                                                               "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
@@ -204,14 +208,20 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
                        convert(temp, qWindowAverage);
 
-                       temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
+                       temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
                        convert(temp, qWindowSize);
 
-                       temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
+                       temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
                        convert(temp, qWindowStep);
 
                        temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
                        convert(temp, qAverage);
+
+                       temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
+                       convert(temp, keepFirst);
+
+                       temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
+                       convert(temp, removeLast);
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
@@ -337,7 +347,7 @@ int TrimSeqsCommand::execute(){
                        outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
                        getOligos(fastaFileNames, qualFileNames);
                }
-               
+
                vector<unsigned long int> fastaFilePos;
                vector<unsigned long int> qFilePos;
                
@@ -507,25 +517,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                string trashCode = "";
                                int currentSeqsDiffs = 0;
 
-                               if(qFileName != ""){
-                                       if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
-                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
-                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
-                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
-
-                                       if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
-                                               success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
-                                       }
-
-                                       if(!success)                            {       trashCode += 'q';       }
-                               }
-                       
                                if(barcodes.size() != 0){
                                        success = stripBarcode(currSeq, currQual, groupBar);
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
-
+                               
                                if(numFPrimers != 0){
                                        success = stripForward(currSeq, currQual, groupPrime);
                                        if(success > pdiffs)            {       trashCode += 'f';       }
@@ -533,11 +530,36 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                }
                                
                                if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
-
+                               
                                if(numRPrimers != 0){
                                        success = stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
+
+                               if(keepFirst != 0){
+                                       success = keepFirstTrim(currSeq, currQual);
+                               }
+                               
+                               if(removeLast != 0){
+                                       success = removeLastTrim(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 'l';       }
+                               }
+
+                               
+                               if(qFileName != ""){
+                                       
+                                       if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
+                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
+                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
+                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
+                                       else                                            {       success = 1;                            }
+
+//                                     if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
+//                                             success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+//                                     }
+                                       
+                                       if(!success)                            {       trashCode += 'q';       }
+                               }                               
                
                                if(minLength > 0 || maxLength > 0){
                                        success = cullLength(currSeq);
@@ -552,7 +574,10 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        if(!success)                            {       trashCode += 'n';       }
                                }
                                
-                               if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
+                               if(flip){               // should go last                       
+                                       currSeq.reverseComplement();
+                                       currQual.flipQScores(); 
+                               }
                                
                                if(trashCode.length() == 0){
                                        currSeq.setAligned(currSeq.getUnaligned());
@@ -1128,7 +1153,6 @@ int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group
                                seq.setUnaligned(rawSequence.substr(oligo.length()));
                                if(qual.getName() != ""){
                                        qual.trimQScores(oligo.length(), -1);
-                                       
                                }
                                success = 0;
                                break;
@@ -1266,6 +1290,52 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
 
 //***************************************************************************************************************
 
+bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
+       try {
+               bool success = 1;
+               if(qscores.getName() != ""){
+                       qscores.trimQScores(-1, keepFirst);
+               }
+               sequence.trim(keepFirst);
+               return success;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "keepFirstTrim", "countDiffs");
+               exit(1);
+       }
+       
+}      
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
+       try {
+               bool success = 0;
+               
+               int length = sequence.getNumBases() - removeLast;
+               
+               if(length > 0){
+                       if(qscores.getName() != ""){
+                               qscores.trimQScores(-1, length);
+                       }
+                       sequence.trim(length);
+                       success = 1;
+               }
+               else{
+                       success = 0;
+               }
+
+               return success;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "removeLastTrim", "countDiffs");
+               exit(1);
+       }
+       
+}      
+
+//***************************************************************************************************************
+
 bool TrimSeqsCommand::cullLength(Sequence& seq){
        try {
        
@@ -1397,78 +1467,5 @@ int TrimSeqsCommand::countDiffs(string oligo, string seq){
        }
 
 }
-//***************************************************************************************************************
-
-//bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
-//     try {
-//             
-//             string rawSequence = seq.getUnaligned();
-//             int seqLength = seq.getNumBases();
-//             bool success = 0;       //guilty until proven innocent
-//             string name;
-//             
-//             qFile >> name;
-//             if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
-//             
-//             while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
-//             
-//             int score;
-//             int end = seqLength;
-//             
-//             for(int i=0;i<seqLength;i++){
-//                     qFile >> score;
-//                     
-//                     if(score < qThreshold){
-//                             end = i;
-//                             break;
-//                     }
-//             }
-//             for(int i=end+1;i<seqLength;i++){
-//                     qFile >> score;
-//             }
-//             
-//             seq.setUnaligned(rawSequence.substr(0,end));
-//             
-//             return 1;
-//     }
-//     catch(exception& e) {
-//             m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
-//             exit(1);
-//     }
-//}
-
-//***************************************************************************************************************
-
-//bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
-//     try {
-//             string rawSequence = seq.getUnaligned();
-//             int seqLength = seq.getNumBases();
-//             bool success = 0;       //guilty until proven innocent
-//             string name;
-//             
-//             qFile >> name;
-//             if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
-//             
-//             while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
-//             
-//             float score;    
-//             float average = 0;
-//             
-//             for(int i=0;i<seqLength;i++){
-//                     qFile >> score;
-//                     average += score;
-//             }
-//             average /= seqLength;
-//
-//             if(average >= qAverage) {       success = 1;    }
-//             else                                    {       success = 0;    }
-//             
-//             return success;
-//     }
-//     catch(exception& e) {
-//             m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
-//             exit(1);
-//     }
-//}
 
 //***************************************************************************************************************
index 2f8fbd7c499559bf65f9f5393982bd00dddfbcc3..21a5d41e759c761a3302340319c857544ffc313e 100644 (file)
@@ -42,6 +42,9 @@ private:
        int stripBarcode(Sequence&, QualityScores&, int&);
        int stripForward(Sequence&, QualityScores&, int&);
        bool stripReverse(Sequence&, QualityScores&);
+       bool keepFirstTrim(Sequence&, QualityScores&);
+       bool removeLastTrim(Sequence&, QualityScores&);
+
        bool cullLength(Sequence&);
        bool cullHomoP(Sequence&);
        bool cullAmbigs(Sequence&);
@@ -54,7 +57,7 @@ private:
        
        bool flip, allFiles, qtrim;
        int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts;
-       int qWindowSize, qWindowStep;
+       int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        vector<string> revPrimer, outputNames;
        set<string> filesToRemove;