]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.h
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / trimseqscommand.h
1 #ifndef TRIMSEQSCOMMAND_H
2 #define TRIMSEQSCOMMAND_H
3
4 /*
5  *  trimseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/6/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "qualityscores.h"
17 #include "trimoligos.h"
18 #include "counttable.h"
19
20
21 class TrimSeqsCommand : public Command {
22 public:
23         TrimSeqsCommand(string);
24         TrimSeqsCommand();
25         ~TrimSeqsCommand(){}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "trim.seqs";   }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
34         string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
35
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
48         bool keepFirstTrim(Sequence&, QualityScores&);
49         bool removeLastTrim(Sequence&, QualityScores&);
50         bool cullLength(Sequence&);
51         bool cullHomoP(Sequence&);
52         bool cullAmbigs(Sequence&);
53     string reverseOligo(string);
54
55         bool abort, createGroup;
56         string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
57         
58         bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
59         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
60         int qWindowSize, qWindowStep, keepFirst, removeLast;
61         double qRollAverage, qThreshold, qWindowAverage, qAverage;
62         vector<string> revPrimer, outputNames;
63         set<string> filesToRemove;
64     map<int, oligosPair> pairedBarcodes;
65     map<int, oligosPair> pairedPrimers;
66         map<string, int> barcodes;
67         vector<string> groupVector;
68         map<string, int> primers;
69     vector<string>  linker;
70     vector<string>  spacer;
71         map<string, int> combos;
72         map<string, int> groupToIndex;
73         vector<string> primerNameVector;        //needed here?
74         vector<string> barcodeNameVector;       //needed here?
75         map<string, int> groupCounts;  
76         map<string, string> nameMap;
77     map<string, int> nameCount; //for countfile name -> repCount
78     map<string, string> groupMap; //for countfile name -> group
79
80         vector<int> processIDS;   //processid
81         vector<linePair> lines;
82         vector<linePair> qLines;
83         
84         int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
85         int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
86         int setLines(string, string);
87 };
88
89 /**************************************************************************************************/
90 //custom data structure for threads to use.
91 // This is passed by void pointer so it can be any data type
92 // that can be passed using a single void pointer (LPVOID).
93 struct trimData {
94     unsigned long long start, end;
95     MothurOut* m;
96     string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
97         vector<vector<string> > fastaFileNames;
98     vector<vector<string> > qualFileNames;
99     vector<vector<string> > nameFileNames;
100     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
101     bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
102         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
103         int qWindowSize, qWindowStep, keepFirst, removeLast, count;
104         double qRollAverage, qThreshold, qWindowAverage, qAverage;
105     vector<string> revPrimer;
106         map<string, int> barcodes;
107         map<string, int> primers;
108     map<string, int> nameCount;
109     vector<string>  linker;
110     vector<string>  spacer;
111         map<string, int> combos;
112         vector<string> primerNameVector;        
113         vector<string> barcodeNameVector;       
114         map<string, int> groupCounts;  
115         map<string, string> nameMap;
116     map<string, string> groupMap;
117     map<int, oligosPair> pairedBarcodes;
118     map<int, oligosPair> pairedPrimers;
119     
120         trimData(){}
121         trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
122                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
123                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
124                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
125                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
126         filename = fn;
127         qFileName = qn;
128         nameFile = nf;
129         countfile = cf;
130         trimFileName = tn;
131         scrapFileName = sn;
132         trimQFileName = tqn;
133         scrapQFileName = sqn;
134         trimNFileName = tnn;
135         scrapNFileName = snn;
136         trimCFileName = tcn;
137         scrapCFileName = scn;
138         groupFileName = gn;
139         fastaFileNames = ffn;
140         qualFileNames = qfn;
141         nameFileNames = nfn;
142         lineStart = lstart;
143         lineEnd = lend;
144         qlineStart = qstart;
145         qlineEnd = qend;
146                 m = mout;
147         nameCount = ncount;
148         
149         pdiffs = pd;
150         bdiffs = bd;
151         ldiffs = ld;
152         sdiffs = sd;
153         tdiffs = td;
154         barcodes = bar;
155         pairedPrimers = ppr;
156         pairedBarcodes = pbr;
157         pairedOligos = po;
158         primers = pri;      numFPrimers = primers.size();
159         revPrimer = revP;   numRPrimers = revPrimer.size();
160         linker = li;        numLinkers = linker.size();
161         spacer = spa;       numSpacers = spacer.size();
162         primerNameVector = priNameVector;
163         barcodeNameVector = barNameVector;
164         
165         createGroup = cGroup;
166         allFiles = aFiles;
167         keepforward = keepF;
168         keepFirst = keepfi;
169         removeLast = removeL;
170         qWindowStep = WindowStep;
171         qWindowSize = WindowSize;
172         qWindowAverage = WindowAverage;
173         qtrim = trim;
174         qThreshold = Threshold;
175         qAverage = Average;
176         qRollAverage = RollAverage;
177         minLength = minL;
178         maxAmbig = maxA;
179         maxHomoP = maxH;
180         maxLength = maxL;
181         flip = fli;
182         reorient = reo;
183         nameMap = nm;
184         count = 0;
185         }
186 };
187 /**************************************************************************************************/
188 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
189 #else
190 static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
191         trimData* pDataArray;
192         pDataArray = (trimData*)lpParam;
193         
194         try {
195         ofstream trimFASTAFile;
196                 pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
197                 
198                 ofstream scrapFASTAFile;
199                 pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
200                 
201                 ofstream trimQualFile;
202                 ofstream scrapQualFile;
203                 if(pDataArray->qFileName != ""){
204                         pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
205                         pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
206                 }
207                 
208                 ofstream trimNameFile;
209                 ofstream scrapNameFile;
210                 if(pDataArray->nameFile != ""){
211                         pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
212                         pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
213                 }
214                 
215                 
216                 ofstream outGroupsFile;
217                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
218                 if(pDataArray->allFiles){
219                         for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
220                                 for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
221                                         if (pDataArray->fastaFileNames[i][j] != "") {
222                                                 ofstream temp;
223                                                 pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
224                                                 if(pDataArray->qFileName != ""){
225                                                         pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
226                                                 }
227                                                 
228                                                 if(pDataArray->nameFile != ""){
229                                                         pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
230                                                 }
231                                         }
232                                 }
233                         }
234                 }
235                 
236         ofstream trimCountFile;
237                 ofstream scrapCountFile;
238                 if(pDataArray->countfile != ""){
239                         pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
240                         pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
241             if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
242                 }
243         
244                 ifstream inFASTA;
245                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
246                 if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
247                         inFASTA.seekg(0);
248                 }else { //this accounts for the difference in line endings. 
249                         inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
250                 }
251                 
252                 ifstream qFile;
253                 if(pDataArray->qFileName != "") {
254                         pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
255                         if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
256                 qFile.seekg(0);
257             }else { //this accounts for the difference in line endings. 
258                 qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
259             } 
260                 }
261                 
262         TrimOligos* trimOligos = NULL;
263         int numBarcodes = pDataArray->barcodes.size();
264         if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
265         else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
266         
267         TrimOligos* rtrimOligos = NULL;
268         if (pDataArray->reorient) {
269             //create reoriented primer and barcode pairs
270             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
271             for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
272                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
273                 rpairedPrimers[it->first] = tempPair;
274             }
275             for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
276                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
277                 rpairedBarcodes[it->first] = tempPair;
278             }
279             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
280         }
281         
282                 pDataArray->count = 0;
283                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
284                                    
285                         if (pDataArray->m->control_pressed) {
286                 delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
287                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
288                                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
289                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
290                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
291                 if(pDataArray->countfile != "") {       scrapCountFile.close(); trimCountFile.close();  }
292
293                                 if(pDataArray->qFileName != ""){ qFile.close(); }
294                                 return 0;
295                         }
296                         
297                         int success = 1;
298                         string trashCode = "";
299                         int currentSeqsDiffs = 0;
300             
301                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
302             Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
303                         
304                         QualityScores currQual; QualityScores savedQual;
305                         if(pDataArray->qFileName != ""){
306                                 currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
307                 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
308                         }
309               
310                         
311                         string origSeq = currSeq.getUnaligned();
312                         if (origSeq != "") {
313                 pDataArray->count++;
314                                 
315                                 int barcodeIndex = 0;
316                                 int primerIndex = 0;
317                                 
318                 if(pDataArray->numLinkers != 0){
319                                         success = trimOligos->stripLinker(currSeq, currQual);
320                                         if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
321                                         else{ currentSeqsDiffs += success;  }
322                                 }
323                 
324                                 if(numBarcodes != 0){
325                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
326                                         if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
327                                         else{ currentSeqsDiffs += success;  }
328                                 }
329                 
330                 if(pDataArray->numSpacers != 0){
331                                         success = trimOligos->stripSpacer(currSeq, currQual);
332                                         if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
333                                         else{ currentSeqsDiffs += success;  }
334
335                                 }
336                 
337                                 if(pDataArray->numFPrimers != 0){
338                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
339                                         if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
340                                         else{ currentSeqsDiffs += success;  }
341                                 }
342                                 
343                                 if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
344                                 
345                                 if(pDataArray->numRPrimers != 0){
346                                         success = trimOligos->stripReverse(currSeq, currQual);
347                                         if(!success)                            {       trashCode += 'r';       }
348                                 }
349                 
350                 if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
351                     int thisSuccess = 0;
352                     string thisTrashCode = "";
353                     int thisCurrentSeqsDiffs = 0;
354                     
355                     int thisBarcodeIndex = 0;
356                     int thisPrimerIndex = 0;
357                     
358                     if(numBarcodes != 0){
359                         thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
360                         if(thisSuccess > pDataArray->bdiffs)            {       thisTrashCode += 'b';   }
361                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
362                     }
363                     
364                     if(pDataArray->numFPrimers != 0){
365                         thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
366                         if(thisSuccess > pDataArray->pdiffs)            {       thisTrashCode += 'f';   }
367                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
368                     }
369                     
370                     if (thisCurrentSeqsDiffs > pDataArray->tdiffs)      {       thisTrashCode += 't';   }
371                     
372                     if (thisTrashCode == "") {
373                         trashCode = thisTrashCode;
374                         success = thisSuccess;
375                         currentSeqsDiffs = thisCurrentSeqsDiffs;
376                         barcodeIndex = thisBarcodeIndex;
377                         primerIndex = thisPrimerIndex;
378                         savedSeq.reverseComplement();
379                         currSeq.setAligned(savedSeq.getAligned());
380                         if(pDataArray->qFileName != ""){
381                             savedQual.flipQScores();
382                             currQual.setScores(savedQual.getScores());
383                         }
384                     }else { trashCode += "(" + thisTrashCode + ")";  }
385                 }
386
387                 
388                                 if(pDataArray->keepFirst != 0){
389                                         //success = keepFirstTrim(currSeq, currQual);
390                     success = 1;
391                     if(currQual.getName() != ""){
392                         currQual.trimQScores(-1, pDataArray->keepFirst);
393                     }
394                     currSeq.trim(pDataArray->keepFirst);
395                                 }
396                                 
397                                 if(pDataArray->removeLast != 0){
398                                         //success = removeLastTrim(currSeq, currQual);
399                     success = 0;
400                     int length = currSeq.getNumBases() - pDataArray->removeLast;
401                     
402                     if(length > 0){
403                         if(currQual.getName() != ""){
404                             currQual.trimQScores(-1, length);
405                         }
406                         currSeq.trim(length);
407                         success = 1;
408                     }
409                     else{ success = 0; }
410                     
411                                         if(!success)                            {       trashCode += 'l';       }
412                                 }
413                 
414                                 
415                                 if(pDataArray->qFileName != ""){
416                                         int origLength = currSeq.getNumBases();
417                                         
418                                         if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
419                                         else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
420                                         else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
421                                         else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
422                                         else                                            {       success = 1;                            }
423                                         
424                                         //you don't want to trim, if it fails above then scrap it
425                                         if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
426                                         
427                                         if(!success)                            {       trashCode += 'q';       }
428                                 }                               
429                 
430                                 if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
431                                         //success = cullLength(currSeq);
432                     int length = currSeq.getNumBases();
433                     success = 0;        //guilty until proven innocent
434                     if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                   {       success = 1;    }
435                     else if(length >= pDataArray->minLength && length <= pDataArray->maxLength) {       success = 1;    }
436                     else                                                                                                {       success = 0;    }
437                     
438                                         if(!success)                            {       trashCode += 'l';       }
439                                 }
440                                 if(pDataArray->maxHomoP > 0){
441                                         //success = cullHomoP(currSeq);
442                     int longHomoP = currSeq.getLongHomoPolymer();
443                     success = 0;        //guilty until proven innocent
444                     if(longHomoP <= pDataArray->maxHomoP){      success = 1;    }
445                     else                                        {       success = 0;    }
446                     
447                                         if(!success)                            {       trashCode += 'h';       }
448                                 }
449                                 if(pDataArray->maxAmbig != -1){
450                                         //success = cullAmbigs(currSeq);
451                     int numNs = currSeq.getAmbigBases();
452                     success = 0;        //guilty until proven innocent
453                     if(numNs <= pDataArray->maxAmbig)   {       success = 1;    }
454                     else                                        {       success = 0;    }
455                                         if(!success)                            {       trashCode += 'n';       }
456                                 }
457                                 
458                                 if(pDataArray->flip){           // should go last                       
459                                         currSeq.reverseComplement();
460                                         if(pDataArray->qFileName != ""){
461                                                 currQual.flipQScores(); 
462                                         }
463                                 }
464                                 
465                                 if(trashCode.length() == 0){
466                     string thisGroup = "";
467                     if (pDataArray->createGroup) {
468                                                 if(numBarcodes != 0){
469                                                         thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
470                                                         if (pDataArray->numFPrimers != 0) {
471                                                                 if (pDataArray->primerNameVector[primerIndex] != "") { 
472                                                                         if(thisGroup != "") {
473                                                                                 thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
474                                                                         }else {
475                                                                                 thisGroup = pDataArray->primerNameVector[primerIndex]; 
476                                                                         }
477                                                                 } 
478                                                         }
479                         }
480                     }
481                     
482                     int pos = thisGroup.find("ignore");
483                     if (pos == string::npos) {
484                         
485                         currSeq.setAligned(currSeq.getUnaligned());
486                         currSeq.printSequence(trimFASTAFile);
487                         
488                         if(pDataArray->qFileName != ""){
489                             currQual.printQScores(trimQualFile);
490                         }
491                         
492                         if(pDataArray->nameFile != ""){
493                             map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
494                             if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
495                             else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
496                         }
497                         
498                         int numRedundants = 0;
499                         if (pDataArray->countfile != "") {
500                             map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
501                             if (itCount != pDataArray->nameCount.end()) { 
502                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
503                                 numRedundants = itCount->second-1;
504                             }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
505                         }
506                         
507                         if (pDataArray->createGroup) {
508                             if(numBarcodes != 0){
509                                 
510                                 if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
511                                 else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
512                                 
513                                 if (pDataArray->nameFile != "") {
514                                     map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
515                                     if (itName != pDataArray->nameMap.end()) { 
516                                         vector<string> thisSeqsNames; 
517                                         pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
518                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
519                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
520                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
521                                         }
522                                     }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                       
523                                 }
524                                 
525                                 map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
526                                 if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
527                                 else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
528                                 
529                             }
530                         }
531                         
532                         if(pDataArray->allFiles){
533                             ofstream output;
534                             pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
535                             currSeq.printSequence(output);
536                             output.close();
537                             
538                             if(pDataArray->qFileName != ""){
539                                 pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
540                                 currQual.printQScores(output);
541                                 output.close();                                                 
542                             }
543                             
544                             if(pDataArray->nameFile != ""){
545                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
546                                 if (itName != pDataArray->nameMap.end()) { 
547                                     pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
548                                     output << itName->first << '\t' << itName->second << endl; 
549                                     output.close();
550                                 }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
551                             }
552                         }
553                     }
554                                 }
555                                 else{
556                                         if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
557                                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
558                                                 if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
559                                                 else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
560                                         }
561                     if (pDataArray->countfile != "") {
562                         map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
563                         if (itCount != pDataArray->nameCount.end()) { 
564                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
565                         }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
566                     }
567                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
568                                         currSeq.setUnaligned(origSeq);
569                                         currSeq.setAligned(origSeq);
570                                         currSeq.printSequence(scrapFASTAFile);
571                                         if(pDataArray->qFileName != ""){
572                                                 currQual.printQScores(scrapQualFile);
573                                         }
574                                 }
575                                 
576                         }
577                         
578                         //report progress
579                         if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
580                         
581                 }
582                 //report progress
583                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
584                 
585         if (pDataArray->reorient) { delete rtrimOligos; }
586                 delete trimOligos;
587                 inFASTA.close();
588                 trimFASTAFile.close();
589                 scrapFASTAFile.close();
590                 if (pDataArray->createGroup) {   outGroupsFile.close();   }
591                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
592                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
593                 
594         return 0;
595             
596         }
597         catch(exception& e) {
598             pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
599             exit(1);
600         }
601     } 
602 #endif
603     
604
605 /**************************************************************************************************/
606
607 #endif