]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.h
working on pam
[mothur.git] / trimseqscommand.h
1 #ifndef TRIMSEQSCOMMAND_H
2 #define TRIMSEQSCOMMAND_H
3
4 /*
5  *  trimseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/6/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "qualityscores.h"
17 #include "trimoligos.h"
18 #include "counttable.h"
19
20
21 class TrimSeqsCommand : public Command {
22 public:
23         TrimSeqsCommand(string);
24         TrimSeqsCommand();
25         ~TrimSeqsCommand(){}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "trim.seqs";   }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
34         string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
35
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
48         bool keepFirstTrim(Sequence&, QualityScores&);
49         bool removeLastTrim(Sequence&, QualityScores&);
50         bool cullLength(Sequence&);
51         bool cullHomoP(Sequence&);
52         bool cullAmbigs(Sequence&);
53     string reverseOligo(string);
54
55         bool abort, createGroup;
56         string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
57         
58         bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
59         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
60         int qWindowSize, qWindowStep, keepFirst, removeLast;
61         double qRollAverage, qThreshold, qWindowAverage, qAverage;
62         vector<string> revPrimer, outputNames;
63         set<string> filesToRemove;
64     map<int, oligosPair> pairedBarcodes;
65     map<int, oligosPair> pairedPrimers;
66         map<string, int> barcodes;
67         vector<string> groupVector;
68         map<string, int> primers;
69     vector<string>  linker;
70     vector<string>  spacer;
71         map<string, int> combos;
72         map<string, int> groupToIndex;
73         vector<string> primerNameVector;        //needed here?
74         vector<string> barcodeNameVector;       //needed here?
75         map<string, int> groupCounts;  
76         map<string, string> nameMap;
77     map<string, int> nameCount; //for countfile name -> repCount
78     map<string, string> groupMap; //for countfile name -> group
79
80         vector<int> processIDS;   //processid
81         vector<linePair> lines;
82         vector<linePair> qLines;
83         
84         int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
85         int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
86         int setLines(string, string);
87 };
88
89 /**************************************************************************************************/
90 //custom data structure for threads to use.
91 // This is passed by void pointer so it can be any data type
92 // that can be passed using a single void pointer (LPVOID).
93 struct trimData {
94     unsigned long long start, end;
95     MothurOut* m;
96     string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
97         vector<vector<string> > fastaFileNames;
98     vector<vector<string> > qualFileNames;
99     vector<vector<string> > nameFileNames;
100     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
101     bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
102         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
103         int qWindowSize, qWindowStep, keepFirst, removeLast, count;
104         double qRollAverage, qThreshold, qWindowAverage, qAverage;
105     vector<string> revPrimer;
106         map<string, int> barcodes;
107         map<string, int> primers;
108     map<string, int> nameCount;
109     vector<string>  linker;
110     vector<string>  spacer;
111         map<string, int> combos;
112         vector<string> primerNameVector;        
113         vector<string> barcodeNameVector;       
114         map<string, int> groupCounts;  
115         map<string, string> nameMap;
116     map<string, string> groupMap;
117     map<int, oligosPair> pairedBarcodes;
118     map<int, oligosPair> pairedPrimers;
119     
120         trimData(){}
121         trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
122                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
123                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
124                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
125                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
126         filename = fn;
127         qFileName = qn;
128         nameFile = nf;
129         countfile = cf;
130         trimFileName = tn;
131         scrapFileName = sn;
132         trimQFileName = tqn;
133         scrapQFileName = sqn;
134         trimNFileName = tnn;
135         scrapNFileName = snn;
136         trimCFileName = tcn;
137         scrapCFileName = scn;
138         groupFileName = gn;
139         fastaFileNames = ffn;
140         qualFileNames = qfn;
141         nameFileNames = nfn;
142         lineStart = lstart;
143         lineEnd = lend;
144         qlineStart = qstart;
145         qlineEnd = qend;
146                 m = mout;
147         nameCount = ncount;
148         
149         pdiffs = pd;
150         bdiffs = bd;
151         ldiffs = ld;
152         sdiffs = sd;
153         tdiffs = td;
154         barcodes = bar;
155         pairedPrimers = ppr;
156         pairedBarcodes = pbr;
157         pairedOligos = po;
158         primers = pri;      numFPrimers = primers.size();
159         revPrimer = revP;   numRPrimers = revPrimer.size();
160         linker = li;        numLinkers = linker.size();
161         spacer = spa;       numSpacers = spacer.size();
162         primerNameVector = priNameVector;
163         barcodeNameVector = barNameVector;
164         
165         createGroup = cGroup;
166         allFiles = aFiles;
167         keepforward = keepF;
168         keepFirst = keepfi;
169         removeLast = removeL;
170         qWindowStep = WindowStep;
171         qWindowSize = WindowSize;
172         qWindowAverage = WindowAverage;
173         qtrim = trim;
174         qThreshold = Threshold;
175         qAverage = Average;
176         qRollAverage = RollAverage;
177         logtransform = lt;
178         minLength = minL;
179         maxAmbig = maxA;
180         maxHomoP = maxH;
181         maxLength = maxL;
182         flip = fli;
183         reorient = reo;
184         nameMap = nm;
185         count = 0;
186         }
187 };
188 /**************************************************************************************************/
189 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
190 #else
191 static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
192         trimData* pDataArray;
193         pDataArray = (trimData*)lpParam;
194         
195         try {
196         ofstream trimFASTAFile;
197                 pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
198                 
199                 ofstream scrapFASTAFile;
200                 pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
201                 
202                 ofstream trimQualFile;
203                 ofstream scrapQualFile;
204                 if(pDataArray->qFileName != ""){
205                         pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
206                         pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
207                 }
208                 
209                 ofstream trimNameFile;
210                 ofstream scrapNameFile;
211                 if(pDataArray->nameFile != ""){
212                         pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
213                         pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
214                 }
215                 
216                 
217                 ofstream outGroupsFile;
218                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
219                 if(pDataArray->allFiles){
220                         for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
221                                 for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
222                                         if (pDataArray->fastaFileNames[i][j] != "") {
223                                                 ofstream temp;
224                                                 pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
225                                                 if(pDataArray->qFileName != ""){
226                                                         pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
227                                                 }
228                                                 
229                                                 if(pDataArray->nameFile != ""){
230                                                         pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
231                                                 }
232                                         }
233                                 }
234                         }
235                 }
236                 
237         ofstream trimCountFile;
238                 ofstream scrapCountFile;
239                 if(pDataArray->countfile != ""){
240                         pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
241                         pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
242             if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
243                 }
244         
245                 ifstream inFASTA;
246                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
247                 if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
248                         inFASTA.seekg(0);
249                 }else { //this accounts for the difference in line endings. 
250                         inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
251                 }
252                 
253                 ifstream qFile;
254                 if(pDataArray->qFileName != "") {
255                         pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
256                         if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
257                 qFile.seekg(0);
258             }else { //this accounts for the difference in line endings. 
259                 qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
260             } 
261                 }
262                 
263         TrimOligos* trimOligos = NULL;
264         int numBarcodes = pDataArray->barcodes.size();
265         if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
266         else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
267         
268         TrimOligos* rtrimOligos = NULL;
269         if (pDataArray->reorient) {
270             //create reoriented primer and barcode pairs
271             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
272             for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
273                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
274                 rpairedPrimers[it->first] = tempPair;
275             }
276             for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
277                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
278                 rpairedBarcodes[it->first] = tempPair;
279             }
280             
281             int index = rpairedBarcodes.size();
282             for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
283                 oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
284                 rpairedBarcodes[index] = tempPair; index++;
285             }
286             
287             index = rpairedPrimers.size();
288             for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
289                 oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
290                 rpairedPrimers[index] = tempPair; index++;
291             }
292
293             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
294         }
295         
296                 pDataArray->count = 0;
297                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
298                                    
299                         if (pDataArray->m->control_pressed) {
300                 delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
301                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
302                                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
303                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
304                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
305                 if(pDataArray->countfile != "") {       scrapCountFile.close(); trimCountFile.close();  }
306
307                                 if(pDataArray->qFileName != ""){ qFile.close(); }
308                                 return 0;
309                         }
310                         
311                         int success = 1;
312                         string trashCode = "";
313                         int currentSeqsDiffs = 0;
314             
315                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
316             Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
317                         
318                         QualityScores currQual; QualityScores savedQual;
319                         if(pDataArray->qFileName != ""){
320                                 currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
321                 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
322                         }
323               
324                         
325                         string origSeq = currSeq.getUnaligned();
326                         if (origSeq != "") {
327                 pDataArray->count++;
328                                 
329                                 int barcodeIndex = 0;
330                                 int primerIndex = 0;
331                                 
332                 if(pDataArray->numLinkers != 0){
333                                         success = trimOligos->stripLinker(currSeq, currQual);
334                                         if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
335                                         else{ currentSeqsDiffs += success;  }
336                                 }
337                 
338                                 if(numBarcodes != 0){
339                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
340                                         if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
341                                         else{ currentSeqsDiffs += success;  }
342                                 }
343                 
344                 if(pDataArray->numSpacers != 0){
345                                         success = trimOligos->stripSpacer(currSeq, currQual);
346                                         if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
347                                         else{ currentSeqsDiffs += success;  }
348
349                                 }
350                 
351                                 if(pDataArray->numFPrimers != 0){
352                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
353                                         if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
354                                         else{ currentSeqsDiffs += success;  }
355                                 }
356                                 
357                                 if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
358                                 
359                                 if(pDataArray->numRPrimers != 0){
360                                         success = trimOligos->stripReverse(currSeq, currQual);
361                                         if(!success)                            {       trashCode += 'r';       }
362                                 }
363                 
364                 if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
365                     int thisSuccess = 0;
366                     string thisTrashCode = "";
367                     int thisCurrentSeqsDiffs = 0;
368                     
369                     int thisBarcodeIndex = 0;
370                     int thisPrimerIndex = 0;
371                     
372                     if(numBarcodes != 0){
373                         thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
374                         if(thisSuccess > pDataArray->bdiffs)            {       thisTrashCode += 'b';   }
375                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
376                     }
377                     
378                     if(pDataArray->numFPrimers != 0){
379                         thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
380                         if(thisSuccess > pDataArray->pdiffs)            {       thisTrashCode += 'f';   }
381                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
382                     }
383                     
384                     if (thisCurrentSeqsDiffs > pDataArray->tdiffs)      {       thisTrashCode += 't';   }
385                     
386                     if (thisTrashCode == "") {
387                         trashCode = thisTrashCode;
388                         success = thisSuccess;
389                         currentSeqsDiffs = thisCurrentSeqsDiffs;
390                         barcodeIndex = thisBarcodeIndex;
391                         primerIndex = thisPrimerIndex;
392                         savedSeq.reverseComplement();
393                         currSeq.setAligned(savedSeq.getAligned());
394                         if(pDataArray->qFileName != ""){
395                             savedQual.flipQScores();
396                             currQual.setScores(savedQual.getScores());
397                         }
398                     }else { trashCode += "(" + thisTrashCode + ")";  }
399                 }
400
401                 
402                                 if(pDataArray->keepFirst != 0){
403                                         //success = keepFirstTrim(currSeq, currQual);
404                     success = 1;
405                     if(currQual.getName() != ""){
406                         currQual.trimQScores(-1, pDataArray->keepFirst);
407                     }
408                     currSeq.trim(pDataArray->keepFirst);
409                                 }
410                                 
411                                 if(pDataArray->removeLast != 0){
412                                         //success = removeLastTrim(currSeq, currQual);
413                     success = 0;
414                     int length = currSeq.getNumBases() - pDataArray->removeLast;
415                     
416                     if(length > 0){
417                         if(currQual.getName() != ""){
418                             currQual.trimQScores(-1, length);
419                         }
420                         currSeq.trim(length);
421                         success = 1;
422                     }
423                     else{ success = 0; }
424                     
425                                         if(!success)                            {       trashCode += 'l';       }
426                                 }
427                 
428                                 
429                                 if(pDataArray->qFileName != ""){
430                                         int origLength = currSeq.getNumBases();
431                                         
432                                         if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
433                                         else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform);                            }
434                                         else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform);        }
435                                         else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform);     }
436                                         else                                            {       success = 1;                            }
437                                         
438                                         //you don't want to trim, if it fails above then scrap it
439                                         if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
440                                         
441                                         if(!success)                            {       trashCode += 'q';       }
442                                 }                               
443                 
444                                 if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
445                                         //success = cullLength(currSeq);
446                     int length = currSeq.getNumBases();
447                     success = 0;        //guilty until proven innocent
448                     if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                   {       success = 1;    }
449                     else if(length >= pDataArray->minLength && length <= pDataArray->maxLength) {       success = 1;    }
450                     else                                                                                                {       success = 0;    }
451                     
452                                         if(!success)                            {       trashCode += 'l';       }
453                                 }
454                                 if(pDataArray->maxHomoP > 0){
455                                         //success = cullHomoP(currSeq);
456                     int longHomoP = currSeq.getLongHomoPolymer();
457                     success = 0;        //guilty until proven innocent
458                     if(longHomoP <= pDataArray->maxHomoP){      success = 1;    }
459                     else                                        {       success = 0;    }
460                     
461                                         if(!success)                            {       trashCode += 'h';       }
462                                 }
463                                 if(pDataArray->maxAmbig != -1){
464                                         //success = cullAmbigs(currSeq);
465                     int numNs = currSeq.getAmbigBases();
466                     success = 0;        //guilty until proven innocent
467                     if(numNs <= pDataArray->maxAmbig)   {       success = 1;    }
468                     else                                        {       success = 0;    }
469                                         if(!success)                            {       trashCode += 'n';       }
470                                 }
471                                 
472                                 if(pDataArray->flip){           // should go last                       
473                                         currSeq.reverseComplement();
474                                         if(pDataArray->qFileName != ""){
475                                                 currQual.flipQScores(); 
476                                         }
477                                 }
478                                 
479                                 if(trashCode.length() == 0){
480                     string thisGroup = "";
481                     if (pDataArray->createGroup) {
482                                                 if(numBarcodes != 0){
483                                                         thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
484                                                         if (pDataArray->numFPrimers != 0) {
485                                                                 if (pDataArray->primerNameVector[primerIndex] != "") { 
486                                                                         if(thisGroup != "") {
487                                                                                 thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
488                                                                         }else {
489                                                                                 thisGroup = pDataArray->primerNameVector[primerIndex]; 
490                                                                         }
491                                                                 } 
492                                                         }
493                         }
494                     }
495                     
496                     int pos = thisGroup.find("ignore");
497                     if (pos == string::npos) {
498                         
499                         currSeq.setAligned(currSeq.getUnaligned());
500                         currSeq.printSequence(trimFASTAFile);
501                         
502                         if(pDataArray->qFileName != ""){
503                             currQual.printQScores(trimQualFile);
504                         }
505                         
506                         if(pDataArray->nameFile != ""){
507                             map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
508                             if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
509                             else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
510                         }
511                         
512                         int numRedundants = 0;
513                         if (pDataArray->countfile != "") {
514                             map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
515                             if (itCount != pDataArray->nameCount.end()) { 
516                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
517                                 numRedundants = itCount->second-1;
518                             }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
519                         }
520                         
521                         if (pDataArray->createGroup) {
522                             if(numBarcodes != 0){
523                                 
524                                 if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
525                                 else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
526                                 
527                                 if (pDataArray->nameFile != "") {
528                                     map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
529                                     if (itName != pDataArray->nameMap.end()) { 
530                                         vector<string> thisSeqsNames; 
531                                         pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
532                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
533                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
534                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
535                                         }
536                                     }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                       
537                                 }
538                                 
539                                 map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
540                                 if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
541                                 else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
542                                 
543                             }
544                         }
545                         
546                         if(pDataArray->allFiles){
547                             ofstream output;
548                             pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
549                             currSeq.printSequence(output);
550                             output.close();
551                             
552                             if(pDataArray->qFileName != ""){
553                                 pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
554                                 currQual.printQScores(output);
555                                 output.close();                                                 
556                             }
557                             
558                             if(pDataArray->nameFile != ""){
559                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
560                                 if (itName != pDataArray->nameMap.end()) { 
561                                     pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
562                                     output << itName->first << '\t' << itName->second << endl; 
563                                     output.close();
564                                 }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
565                             }
566                         }
567                     }
568                                 }
569                                 else{
570                                         if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
571                                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
572                                                 if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
573                                                 else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
574                                         }
575                     if (pDataArray->countfile != "") {
576                         map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
577                         if (itCount != pDataArray->nameCount.end()) { 
578                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
579                         }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
580                     }
581                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
582                                         currSeq.setUnaligned(origSeq);
583                                         currSeq.setAligned(origSeq);
584                                         currSeq.printSequence(scrapFASTAFile);
585                                         if(pDataArray->qFileName != ""){
586                                                 currQual.printQScores(scrapQualFile);
587                                         }
588                                 }
589                                 
590                         }
591                         
592                         //report progress
593                         if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
594                         
595                 }
596                 //report progress
597                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
598                 
599         if (pDataArray->reorient) { delete rtrimOligos; }
600                 delete trimOligos;
601                 inFASTA.close();
602                 trimFASTAFile.close();
603                 scrapFASTAFile.close();
604                 if (pDataArray->createGroup) {   outGroupsFile.close();   }
605                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
606                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
607                 
608         return 0;
609             
610         }
611         catch(exception& e) {
612             pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
613             exit(1);
614         }
615     } 
616 #endif
617     
618
619 /**************************************************************************************************/
620
621 #endif