]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.h
working on chimera.uchime change for dereplicate=t bug. added shared file to get...
[mothur.git] / trimseqscommand.h
1 #ifndef TRIMSEQSCOMMAND_H
2 #define TRIMSEQSCOMMAND_H
3
4 /*
5  *  trimseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/6/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "qualityscores.h"
17 #include "trimoligos.h"
18 #include "counttable.h"
19
20
21 class TrimSeqsCommand : public Command {
22 public:
23         TrimSeqsCommand(string);
24         TrimSeqsCommand();
25         ~TrimSeqsCommand(){}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "trim.seqs";   }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
34         string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
35
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
48         bool keepFirstTrim(Sequence&, QualityScores&);
49         bool removeLastTrim(Sequence&, QualityScores&);
50         bool cullLength(Sequence&);
51         bool cullHomoP(Sequence&);
52         bool cullAmbigs(Sequence&);
53     string reverseOligo(string);
54
55         bool abort, createGroup;
56         string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
57         
58         bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
59         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
60         int qWindowSize, qWindowStep, keepFirst, removeLast;
61         double qRollAverage, qThreshold, qWindowAverage, qAverage;
62         vector<string> revPrimer, outputNames;
63         set<string> filesToRemove;
64     map<int, oligosPair> pairedBarcodes;
65     map<int, oligosPair> pairedPrimers;
66         map<string, int> barcodes;
67         vector<string> groupVector;
68         map<string, int> primers;
69     vector<string>  linker;
70     vector<string>  spacer;
71         map<string, int> combos;
72         map<string, int> groupToIndex;
73         vector<string> primerNameVector;        //needed here?
74         vector<string> barcodeNameVector;       //needed here?
75         map<string, int> groupCounts;  
76         map<string, string> nameMap;
77     map<string, int> nameCount; //for countfile name -> repCount
78     map<string, string> groupMap; //for countfile name -> group
79
80         vector<int> processIDS;   //processid
81         vector<linePair> lines;
82         vector<linePair> qLines;
83         
84         int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
85         int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
86         int setLines(string, string);
87 };
88
89 /**************************************************************************************************/
90 //custom data structure for threads to use.
91 // This is passed by void pointer so it can be any data type
92 // that can be passed using a single void pointer (LPVOID).
93 struct trimData {
94     unsigned long long start, end;
95     MothurOut* m;
96     string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
97         vector<vector<string> > fastaFileNames;
98     vector<vector<string> > qualFileNames;
99     vector<vector<string> > nameFileNames;
100     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
101     bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
102         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
103         int qWindowSize, qWindowStep, keepFirst, removeLast, count;
104         double qRollAverage, qThreshold, qWindowAverage, qAverage;
105     vector<string> revPrimer;
106         map<string, int> barcodes;
107         map<string, int> primers;
108     map<string, int> nameCount;
109     vector<string>  linker;
110     vector<string>  spacer;
111         map<string, int> combos;
112         vector<string> primerNameVector;        
113         vector<string> barcodeNameVector;       
114         map<string, int> groupCounts;  
115         map<string, string> nameMap;
116     map<string, string> groupMap;
117     map<int, oligosPair> pairedBarcodes;
118     map<int, oligosPair> pairedPrimers;
119     
120         trimData(){}
121         trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
122                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
123                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
124                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
125                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
126         filename = fn;
127         qFileName = qn;
128         nameFile = nf;
129         countfile = cf;
130         trimFileName = tn;
131         scrapFileName = sn;
132         trimQFileName = tqn;
133         scrapQFileName = sqn;
134         trimNFileName = tnn;
135         scrapNFileName = snn;
136         trimCFileName = tcn;
137         scrapCFileName = scn;
138         groupFileName = gn;
139         fastaFileNames = ffn;
140         qualFileNames = qfn;
141         nameFileNames = nfn;
142         lineStart = lstart;
143         lineEnd = lend;
144         qlineStart = qstart;
145         qlineEnd = qend;
146                 m = mout;
147         nameCount = ncount;
148         
149         pdiffs = pd;
150         bdiffs = bd;
151         ldiffs = ld;
152         sdiffs = sd;
153         tdiffs = td;
154         barcodes = bar;
155         pairedPrimers = ppr;
156         pairedBarcodes = pbr;
157         pairedOligos = po;
158         primers = pri;      numFPrimers = primers.size();
159         revPrimer = revP;   numRPrimers = revPrimer.size();
160         linker = li;        numLinkers = linker.size();
161         spacer = spa;       numSpacers = spacer.size();
162         primerNameVector = priNameVector;
163         barcodeNameVector = barNameVector;
164         
165         createGroup = cGroup;
166         allFiles = aFiles;
167         keepforward = keepF;
168         keepFirst = keepfi;
169         removeLast = removeL;
170         qWindowStep = WindowStep;
171         qWindowSize = WindowSize;
172         qWindowAverage = WindowAverage;
173         qtrim = trim;
174         qThreshold = Threshold;
175         qAverage = Average;
176         qRollAverage = RollAverage;
177         minLength = minL;
178         maxAmbig = maxA;
179         maxHomoP = maxH;
180         maxLength = maxL;
181         flip = fli;
182         reorient = reo;
183         nameMap = nm;
184         count = 0;
185         }
186 };
187 /**************************************************************************************************/
188 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
189 #else
190 static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
191         trimData* pDataArray;
192         pDataArray = (trimData*)lpParam;
193         
194         try {
195         ofstream trimFASTAFile;
196                 pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
197                 
198                 ofstream scrapFASTAFile;
199                 pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
200                 
201                 ofstream trimQualFile;
202                 ofstream scrapQualFile;
203                 if(pDataArray->qFileName != ""){
204                         pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
205                         pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
206                 }
207                 
208                 ofstream trimNameFile;
209                 ofstream scrapNameFile;
210                 if(pDataArray->nameFile != ""){
211                         pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
212                         pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
213                 }
214                 
215                 
216                 ofstream outGroupsFile;
217                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
218                 if(pDataArray->allFiles){
219                         for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
220                                 for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
221                                         if (pDataArray->fastaFileNames[i][j] != "") {
222                                                 ofstream temp;
223                                                 pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
224                                                 if(pDataArray->qFileName != ""){
225                                                         pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
226                                                 }
227                                                 
228                                                 if(pDataArray->nameFile != ""){
229                                                         pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
230                                                 }
231                                         }
232                                 }
233                         }
234                 }
235                 
236         ofstream trimCountFile;
237                 ofstream scrapCountFile;
238                 if(pDataArray->countfile != ""){
239                         pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
240                         pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
241             if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
242                 }
243         
244                 ifstream inFASTA;
245                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
246                 if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
247                         inFASTA.seekg(0);
248                 }else { //this accounts for the difference in line endings. 
249                         inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
250                 }
251                 
252                 ifstream qFile;
253                 if(pDataArray->qFileName != "") {
254                         pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
255                         if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
256                 qFile.seekg(0);
257             }else { //this accounts for the difference in line endings. 
258                 qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
259             } 
260                 }
261                 
262         TrimOligos* trimOligos = NULL;
263         int numBarcodes = pDataArray->barcodes.size();
264         if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
265         else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
266         
267         TrimOligos* rtrimOligos = NULL;
268         if (pDataArray->reorient) {
269             //create reoriented primer and barcode pairs
270             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
271             for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
272                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
273                 rpairedPrimers[it->first] = tempPair;
274             }
275             for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
276                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
277                 rpairedBarcodes[it->first] = tempPair;
278             }
279             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
280         }
281         
282                 pDataArray->count = 0;
283                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
284                                    
285                         if (pDataArray->m->control_pressed) {
286                 delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
287                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
288                                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
289                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
290                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
291                 if(pDataArray->countfile != "") {       scrapCountFile.close(); trimCountFile.close();  }
292
293                                 if(pDataArray->qFileName != ""){ qFile.close(); }
294                                 return 0;
295                         }
296                         
297                         int success = 1;
298                         string trashCode = "";
299                         int currentSeqsDiffs = 0;
300             
301                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
302                         
303                         QualityScores currQual;
304                         if(pDataArray->qFileName != ""){
305                                 currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
306                         }
307                         
308                         string origSeq = currSeq.getUnaligned();
309                         if (origSeq != "") {
310                 pDataArray->count++;
311                                 
312                                 int barcodeIndex = 0;
313                                 int primerIndex = 0;
314                                 
315                 if(pDataArray->numLinkers != 0){
316                                         success = trimOligos->stripLinker(currSeq, currQual);
317                                         if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
318                                         else{ currentSeqsDiffs += success;  }
319                                 }
320                 
321                                 if(numBarcodes != 0){
322                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
323                                         if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
324                                         else{ currentSeqsDiffs += success;  }
325                                 }
326                 
327                 if(pDataArray->numSpacers != 0){
328                                         success = trimOligos->stripSpacer(currSeq, currQual);
329                                         if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
330                                         else{ currentSeqsDiffs += success;  }
331
332                                 }
333                 
334                                 if(pDataArray->numFPrimers != 0){
335                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
336                                         if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
337                                         else{ currentSeqsDiffs += success;  }
338                                 }
339                                 
340                                 if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
341                                 
342                                 if(pDataArray->numRPrimers != 0){
343                                         success = trimOligos->stripReverse(currSeq, currQual);
344                                         if(!success)                            {       trashCode += 'r';       }
345                                 }
346                 
347                 if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
348                     int thisSuccess = 0;
349                     string thisTrashCode = "";
350                     int thisCurrentSeqsDiffs = 0;
351                     
352                     int thisBarcodeIndex = 0;
353                     int thisPrimerIndex = 0;
354                     
355                     if(numBarcodes != 0){
356                         thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
357                         if(thisSuccess > pDataArray->bdiffs)            {       thisTrashCode += 'b';   }
358                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
359                     }
360                     
361                     if(pDataArray->numFPrimers != 0){
362                         thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
363                         if(thisSuccess > pDataArray->pdiffs)            {       thisTrashCode += 'f';   }
364                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
365                     }
366                     
367                     if (thisCurrentSeqsDiffs > pDataArray->tdiffs)      {       thisTrashCode += 't';   }
368                     
369                     if (thisTrashCode == "") {
370                         trashCode = thisTrashCode;
371                         success = thisSuccess;
372                         currentSeqsDiffs = thisCurrentSeqsDiffs;
373                         barcodeIndex = thisBarcodeIndex;
374                         primerIndex = thisPrimerIndex;
375                         currSeq.reverseComplement();
376                         if(pDataArray->qFileName != ""){
377                             currQual.flipQScores();
378                         }
379                     }
380                 }
381
382                 
383                                 if(pDataArray->keepFirst != 0){
384                                         //success = keepFirstTrim(currSeq, currQual);
385                     success = 1;
386                     if(currQual.getName() != ""){
387                         currQual.trimQScores(-1, pDataArray->keepFirst);
388                     }
389                     currSeq.trim(pDataArray->keepFirst);
390                                 }
391                                 
392                                 if(pDataArray->removeLast != 0){
393                                         //success = removeLastTrim(currSeq, currQual);
394                     success = 0;
395                     int length = currSeq.getNumBases() - pDataArray->removeLast;
396                     
397                     if(length > 0){
398                         if(currQual.getName() != ""){
399                             currQual.trimQScores(-1, length);
400                         }
401                         currSeq.trim(length);
402                         success = 1;
403                     }
404                     else{ success = 0; }
405                     
406                                         if(!success)                            {       trashCode += 'l';       }
407                                 }
408                 
409                                 
410                                 if(pDataArray->qFileName != ""){
411                                         int origLength = currSeq.getNumBases();
412                                         
413                                         if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
414                                         else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
415                                         else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
416                                         else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
417                                         else                                            {       success = 1;                            }
418                                         
419                                         //you don't want to trim, if it fails above then scrap it
420                                         if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
421                                         
422                                         if(!success)                            {       trashCode += 'q';       }
423                                 }                               
424                 
425                                 if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
426                                         //success = cullLength(currSeq);
427                     int length = currSeq.getNumBases();
428                     success = 0;        //guilty until proven innocent
429                     if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                   {       success = 1;    }
430                     else if(length >= pDataArray->minLength && length <= pDataArray->maxLength) {       success = 1;    }
431                     else                                                                                                {       success = 0;    }
432                     
433                                         if(!success)                            {       trashCode += 'l';       }
434                                 }
435                                 if(pDataArray->maxHomoP > 0){
436                                         //success = cullHomoP(currSeq);
437                     int longHomoP = currSeq.getLongHomoPolymer();
438                     success = 0;        //guilty until proven innocent
439                     if(longHomoP <= pDataArray->maxHomoP){      success = 1;    }
440                     else                                        {       success = 0;    }
441                     
442                                         if(!success)                            {       trashCode += 'h';       }
443                                 }
444                                 if(pDataArray->maxAmbig != -1){
445                                         //success = cullAmbigs(currSeq);
446                     int numNs = currSeq.getAmbigBases();
447                     success = 0;        //guilty until proven innocent
448                     if(numNs <= pDataArray->maxAmbig)   {       success = 1;    }
449                     else                                        {       success = 0;    }
450                                         if(!success)                            {       trashCode += 'n';       }
451                                 }
452                                 
453                                 if(pDataArray->flip){           // should go last                       
454                                         currSeq.reverseComplement();
455                                         if(pDataArray->qFileName != ""){
456                                                 currQual.flipQScores(); 
457                                         }
458                                 }
459                                 
460                                 if(trashCode.length() == 0){
461                     string thisGroup = "";
462                     if (pDataArray->createGroup) {
463                                                 if(numBarcodes != 0){
464                                                         thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
465                                                         if (pDataArray->numFPrimers != 0) {
466                                                                 if (pDataArray->primerNameVector[primerIndex] != "") { 
467                                                                         if(thisGroup != "") {
468                                                                                 thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
469                                                                         }else {
470                                                                                 thisGroup = pDataArray->primerNameVector[primerIndex]; 
471                                                                         }
472                                                                 } 
473                                                         }
474                         }
475                     }
476                     
477                     int pos = thisGroup.find("ignore");
478                     if (pos == string::npos) {
479                         
480                         currSeq.setAligned(currSeq.getUnaligned());
481                         currSeq.printSequence(trimFASTAFile);
482                         
483                         if(pDataArray->qFileName != ""){
484                             currQual.printQScores(trimQualFile);
485                         }
486                         
487                         if(pDataArray->nameFile != ""){
488                             map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
489                             if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
490                             else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
491                         }
492                         
493                         int numRedundants = 0;
494                         if (pDataArray->countfile != "") {
495                             map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
496                             if (itCount != pDataArray->nameCount.end()) { 
497                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
498                                 numRedundants = itCount->second-1;
499                             }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
500                         }
501                         
502                         if (pDataArray->createGroup) {
503                             if(numBarcodes != 0){
504                                 
505                                 if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
506                                 else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
507                                 
508                                 if (pDataArray->nameFile != "") {
509                                     map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
510                                     if (itName != pDataArray->nameMap.end()) { 
511                                         vector<string> thisSeqsNames; 
512                                         pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
513                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
514                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
515                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
516                                         }
517                                     }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                       
518                                 }
519                                 
520                                 map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
521                                 if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
522                                 else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
523                                 
524                             }
525                         }
526                         
527                         if(pDataArray->allFiles){
528                             ofstream output;
529                             pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
530                             currSeq.printSequence(output);
531                             output.close();
532                             
533                             if(pDataArray->qFileName != ""){
534                                 pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
535                                 currQual.printQScores(output);
536                                 output.close();                                                 
537                             }
538                             
539                             if(pDataArray->nameFile != ""){
540                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
541                                 if (itName != pDataArray->nameMap.end()) { 
542                                     pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
543                                     output << itName->first << '\t' << itName->second << endl; 
544                                     output.close();
545                                 }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
546                             }
547                         }
548                     }
549                                 }
550                                 else{
551                                         if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
552                                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
553                                                 if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
554                                                 else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
555                                         }
556                     if (pDataArray->countfile != "") {
557                         map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
558                         if (itCount != pDataArray->nameCount.end()) { 
559                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
560                         }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
561                     }
562                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
563                                         currSeq.setUnaligned(origSeq);
564                                         currSeq.setAligned(origSeq);
565                                         currSeq.printSequence(scrapFASTAFile);
566                                         if(pDataArray->qFileName != ""){
567                                                 currQual.printQScores(scrapQualFile);
568                                         }
569                                 }
570                                 
571                         }
572                         
573                         //report progress
574                         if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
575                         
576                 }
577                 //report progress
578                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
579                 
580         if (pDataArray->reorient) { delete rtrimOligos; }
581                 delete trimOligos;
582                 inFASTA.close();
583                 trimFASTAFile.close();
584                 scrapFASTAFile.close();
585                 if (pDataArray->createGroup) {   outGroupsFile.close();   }
586                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
587                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
588                 
589         return 0;
590             
591         }
592         catch(exception& e) {
593             pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
594             exit(1);
595         }
596     } 
597 #endif
598     
599
600 /**************************************************************************************************/
601
602 #endif