]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.h
linux screen.seqs segfault
[mothur.git] / trimseqscommand.h
index 8d9a57a54ffd0db053956200602f3aa06fb4de5b..80e1ebe1a00e2f56ec19a6f23c3ba890af1c4f10 100644 (file)
@@ -27,8 +27,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "trim.seqs";   }
        string getCommandCategory()             { return "Sequence Processing";         }
-       string getOutputFileNameTag(string, string);
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
        string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
 
@@ -61,7 +62,6 @@ private:
        vector<string> revPrimer, outputNames;
        set<string> filesToRemove;
        map<string, int> barcodes;
-    map<string, int> rbarcodes;
        vector<string> groupVector;
        map<string, int> primers;
     vector<string>  linker;
@@ -102,7 +102,6 @@ struct trimData {
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
     vector<string> revPrimer;
        map<string, int> barcodes;
-    map<string, int> rbarcodes;
        map<string, int> primers;
     map<string, int> nameCount;
     vector<string>  linker;
@@ -116,7 +115,7 @@ struct trimData {
     
        trimData(){}
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
+                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
                       int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
@@ -149,7 +148,6 @@ struct trimData {
         sdiffs = sd;
         tdiffs = td;
         barcodes = bar;
-        rbarcodes = rbar;
         primers = pri;      numFPrimers = primers.size();
         revPrimer = revP;   numRPrimers = revPrimer.size();
         linker = li;        numLinkers = linker.size();
@@ -254,9 +252,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                }
                
                
-               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->rbarcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
         
-               pDataArray->count = pDataArray->lineEnd;
+               pDataArray->count = 0;
                for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                   
                        if (pDataArray->m->control_pressed) { 
@@ -283,6 +281,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
+                pDataArray->count++;
                                
                                int barcodeIndex = 0;
                                int primerIndex = 0;
@@ -299,12 +298,6 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
-                               if(pDataArray->rbarcodes.size() != 0){
-                                       success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
-                                       if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
-                                       else{ currentSeqsDiffs += success;  }
-                               }
-                
                 if(pDataArray->numSpacers != 0){
                                        success = trimOligos.stripSpacer(currSeq, currQual);
                                        if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
@@ -403,31 +396,10 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                }
                                
                                if(trashCode.length() == 0){
-                                       currSeq.setAligned(currSeq.getUnaligned());
-                                       currSeq.printSequence(trimFASTAFile);
-                                       
-                                       if(pDataArray->qFileName != ""){
-                                               currQual.printQScores(trimQualFile);
-                                       }
-                                       
-                                       if(pDataArray->nameFile != ""){
-                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                               if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
-                                               else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                                       }
-                                       
-                    int numRedundants = 0;
-                    if (pDataArray->countfile != "") {
-                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
-                        if (itCount != pDataArray->nameCount.end()) { 
-                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
-                            numRedundants = itCount->second-1;
-                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
-                    }
-                                       
-                                       if (pDataArray->createGroup) {
+                    string thisGroup = "";
+                    if (pDataArray->createGroup) {
                                                if(pDataArray->barcodes.size() != 0){
-                                                       string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
+                                                       thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
                                                        if (pDataArray->primers.size() != 0) { 
                                                                if (pDataArray->primerNameVector[primerIndex] != "") { 
                                                                        if(thisGroup != "") {
@@ -437,50 +409,81 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                                        }
                                                                } 
                                                        }
-                                                       
-                                                       if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
-                            else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
-                                                       
-                                                       if (pDataArray->nameFile != "") {
-                                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                                               if (itName != pDataArray->nameMap.end()) { 
-                                                                       vector<string> thisSeqsNames; 
-                                                                       pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
-                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
-                                                                       for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                                                               outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
-                                                                       }
-                                                               }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                   
-                                                       }
-                                                       
-                                                       map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
-                                                       if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
-                                                       else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
+                        }
+                    }
+                    
+                    int pos = thisGroup.find("ignore");
+                    if (pos == string::npos) {
+                        
+                        currSeq.setAligned(currSeq.getUnaligned());
+                        currSeq.printSequence(trimFASTAFile);
+                        
+                        if(pDataArray->qFileName != ""){
+                            currQual.printQScores(trimQualFile);
+                        }
+                        
+                        if(pDataArray->nameFile != ""){
+                            map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                            if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
+                            else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                        }
+                        
+                        int numRedundants = 0;
+                        if (pDataArray->countfile != "") {
+                            map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                            if (itCount != pDataArray->nameCount.end()) { 
+                                trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                                numRedundants = itCount->second-1;
+                            }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                        }
+                        
+                        if (pDataArray->createGroup) {
+                            if(pDataArray->barcodes.size() != 0){
+                                
+                                if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                                else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
+                                
+                                if (pDataArray->nameFile != "") {
+                                    map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                    if (itName != pDataArray->nameMap.end()) { 
+                                        vector<string> thisSeqsNames; 
+                                        pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                        numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
+                                        for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                            outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+                                        }
+                                    }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                      
+                                }
+                                
+                                map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+                                if (it == pDataArray->groupCounts.end()) {     pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
+                                else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
+                                
+                            }
+                        }
+                        
+                        if(pDataArray->allFiles){
+                            ofstream output;
+                            pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
+                            currSeq.printSequence(output);
+                            output.close();
                             
-                                               }
-                                       }
-                                       
-                                       if(pDataArray->allFiles){
-                                               ofstream output;
-                                               pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
-                                               currSeq.printSequence(output);
-                                               output.close();
-                                               
-                                               if(pDataArray->qFileName != ""){
-                                                       pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
-                                                       currQual.printQScores(output);
-                                                       output.close();                                                 
-                                               }
-                                               
-                                               if(pDataArray->nameFile != ""){
-                                                       map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
-                                                       if (itName != pDataArray->nameMap.end()) { 
-                                                               pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
-                                                               output << itName->first << '\t' << itName->second << endl; 
-                                                               output.close();
-                                                       }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                                               }
-                                       }
+                            if(pDataArray->qFileName != ""){
+                                pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
+                                currQual.printQScores(output);
+                                output.close();                                                        
+                            }
+                            
+                            if(pDataArray->nameFile != ""){
+                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                if (itName != pDataArray->nameMap.end()) { 
+                                    pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
+                                    output << itName->first << '\t' << itName->second << endl; 
+                                    output.close();
+                                }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                            }
+                        }
+                    }
                                }
                                else{
                                        if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed