]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.h
mods to resolve some misc warnings. added alignPrimer function to needle man to allow...
[mothur.git] / trimseqscommand.h
1 #ifndef TRIMSEQSCOMMAND_H
2 #define TRIMSEQSCOMMAND_H
3
4 /*
5  *  trimseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/6/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "qualityscores.h"
17 #include "trimoligos.h"
18 #include "counttable.h"
19
20
21 class TrimSeqsCommand : public Command {
22 public:
23         TrimSeqsCommand(string);
24         TrimSeqsCommand();
25         ~TrimSeqsCommand(){}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "trim.seqs";   }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; }
34         string getDescription()         { return "provides the preprocessing features needed to screen and sort pyrosequences"; }
35
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
48         bool keepFirstTrim(Sequence&, QualityScores&);
49         bool removeLastTrim(Sequence&, QualityScores&);
50         bool cullLength(Sequence&);
51         bool cullHomoP(Sequence&);
52         bool cullAmbigs(Sequence&);
53     string reverseOligo(string);
54
55         bool abort, createGroup;
56         string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
57         
58         bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
59         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
60         int qWindowSize, qWindowStep, keepFirst, removeLast;
61         double qRollAverage, qThreshold, qWindowAverage, qAverage;
62         vector<string> revPrimer, outputNames;
63         set<string> filesToRemove;
64     map<int, oligosPair> pairedBarcodes;
65     map<int, oligosPair> pairedPrimers;
66         map<string, int> barcodes;
67         vector<string> groupVector;
68         map<string, int> primers;
69     vector<string>  linker;
70     vector<string>  spacer;
71         map<string, int> combos;
72         map<string, int> groupToIndex;
73         vector<string> primerNameVector;        //needed here?
74         vector<string> barcodeNameVector;       //needed here?
75         map<string, int> groupCounts;  
76         map<string, string> nameMap;
77     map<string, int> nameCount; //for countfile name -> repCount
78     map<string, string> groupMap; //for countfile name -> group
79
80         vector<int> processIDS;   //processid
81         vector<linePair> lines;
82         vector<linePair> qLines;
83         
84         int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
85         int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
86         int setLines(string, string);
87 };
88
89 /**************************************************************************************************/
90 //custom data structure for threads to use.
91 // This is passed by void pointer so it can be any data type
92 // that can be passed using a single void pointer (LPVOID).
93 struct trimData {
94     unsigned long long start, end;
95     MothurOut* m;
96     string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
97         vector<vector<string> > fastaFileNames;
98     vector<vector<string> > qualFileNames;
99     vector<vector<string> > nameFileNames;
100     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
101     bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
102         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
103         int qWindowSize, qWindowStep, keepFirst, removeLast, count;
104         double qRollAverage, qThreshold, qWindowAverage, qAverage;
105     vector<string> revPrimer;
106         map<string, int> barcodes;
107         map<string, int> primers;
108     map<string, int> nameCount;
109     vector<string>  linker;
110     vector<string>  spacer;
111         map<string, int> combos;
112         vector<string> primerNameVector;        
113         vector<string> barcodeNameVector;       
114         map<string, int> groupCounts;  
115         map<string, string> nameMap;
116     map<string, string> groupMap;
117     map<int, oligosPair> pairedBarcodes;
118     map<int, oligosPair> pairedPrimers;
119     
120         trimData(){}
121         trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
122                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
123                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
124                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
125                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
126         filename = fn;
127         qFileName = qn;
128         nameFile = nf;
129         countfile = cf;
130         trimFileName = tn;
131         scrapFileName = sn;
132         trimQFileName = tqn;
133         scrapQFileName = sqn;
134         trimNFileName = tnn;
135         scrapNFileName = snn;
136         trimCFileName = tcn;
137         scrapCFileName = scn;
138         groupFileName = gn;
139         fastaFileNames = ffn;
140         qualFileNames = qfn;
141         nameFileNames = nfn;
142         lineStart = lstart;
143         lineEnd = lend;
144         qlineStart = qstart;
145         qlineEnd = qend;
146                 m = mout;
147         nameCount = ncount;
148         
149         pdiffs = pd;
150         bdiffs = bd;
151         ldiffs = ld;
152         sdiffs = sd;
153         tdiffs = td;
154         barcodes = bar;
155         pairedPrimers = ppr;
156         pairedBarcodes = pbr;
157         pairedOligos = po;
158         primers = pri;      numFPrimers = primers.size();
159         revPrimer = revP;   numRPrimers = revPrimer.size();
160         linker = li;        numLinkers = linker.size();
161         spacer = spa;       numSpacers = spacer.size();
162         primerNameVector = priNameVector;
163         barcodeNameVector = barNameVector;
164         
165         createGroup = cGroup;
166         allFiles = aFiles;
167         keepforward = keepF;
168         keepFirst = keepfi;
169         removeLast = removeL;
170         qWindowStep = WindowStep;
171         qWindowSize = WindowSize;
172         qWindowAverage = WindowAverage;
173         qtrim = trim;
174         qThreshold = Threshold;
175         qAverage = Average;
176         qRollAverage = RollAverage;
177         minLength = minL;
178         maxAmbig = maxA;
179         maxHomoP = maxH;
180         maxLength = maxL;
181         flip = fli;
182         reorient = reo;
183         nameMap = nm;
184         count = 0;
185         }
186 };
187 /**************************************************************************************************/
188 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
189 #else
190 static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
191         trimData* pDataArray;
192         pDataArray = (trimData*)lpParam;
193         
194         try {
195         ofstream trimFASTAFile;
196                 pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
197                 
198                 ofstream scrapFASTAFile;
199                 pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
200                 
201                 ofstream trimQualFile;
202                 ofstream scrapQualFile;
203                 if(pDataArray->qFileName != ""){
204                         pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
205                         pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
206                 }
207                 
208                 ofstream trimNameFile;
209                 ofstream scrapNameFile;
210                 if(pDataArray->nameFile != ""){
211                         pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
212                         pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
213                 }
214                 
215                 
216                 ofstream outGroupsFile;
217                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
218                 if(pDataArray->allFiles){
219                         for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
220                                 for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
221                                         if (pDataArray->fastaFileNames[i][j] != "") {
222                                                 ofstream temp;
223                                                 pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
224                                                 if(pDataArray->qFileName != ""){
225                                                         pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
226                                                 }
227                                                 
228                                                 if(pDataArray->nameFile != ""){
229                                                         pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
230                                                 }
231                                         }
232                                 }
233                         }
234                 }
235                 
236         ofstream trimCountFile;
237                 ofstream scrapCountFile;
238                 if(pDataArray->countfile != ""){
239                         pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
240                         pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
241             if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
242                 }
243         
244                 ifstream inFASTA;
245                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
246                 if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
247                         inFASTA.seekg(0);
248                 }else { //this accounts for the difference in line endings. 
249                         inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
250                 }
251                 
252                 ifstream qFile;
253                 if(pDataArray->qFileName != "") {
254                         pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
255                         if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
256                 qFile.seekg(0);
257             }else { //this accounts for the difference in line endings. 
258                 qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
259             } 
260                 }
261                 
262         TrimOligos* trimOligos = NULL;
263         int numBarcodes = pDataArray->barcodes.size();
264         if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); }
265         else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
266         
267         TrimOligos* rtrimOligos = NULL;
268         if (pDataArray->reorient) {
269             //create reoriented primer and barcode pairs
270             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
271             for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
272                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
273                 rpairedPrimers[it->first] = tempPair;
274             }
275             for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
276                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
277                 rpairedBarcodes[it->first] = tempPair;
278             }
279             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
280         }
281         
282                 pDataArray->count = 0;
283                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
284                                    
285                         if (pDataArray->m->control_pressed) {
286                 delete trimOligos;
287                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
288                                 if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
289                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
290                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
291                 if(pDataArray->countfile != "") {       scrapCountFile.close(); trimCountFile.close();  }
292
293                                 if(pDataArray->qFileName != ""){ qFile.close(); }
294                                 return 0;
295                         }
296                         
297                         int success = 1;
298                         string trashCode = "";
299                         int currentSeqsDiffs = 0;
300             
301                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
302                         
303                         QualityScores currQual;
304                         if(pDataArray->qFileName != ""){
305                                 currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
306                         }
307                         
308                         string origSeq = currSeq.getUnaligned();
309                         if (origSeq != "") {
310                 pDataArray->count++;
311                                 
312                                 int barcodeIndex = 0;
313                                 int primerIndex = 0;
314                                 
315                 if(pDataArray->numLinkers != 0){
316                                         success = trimOligos->stripLinker(currSeq, currQual);
317                                         if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
318                                         else{ currentSeqsDiffs += success;  }
319                                 }
320                 
321                                 if(numBarcodes != 0){
322                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
323                                         if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
324                                         else{ currentSeqsDiffs += success;  }
325                                 }
326                 
327                 if(pDataArray->numSpacers != 0){
328                                         success = trimOligos->stripSpacer(currSeq, currQual);
329                                         if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
330                                         else{ currentSeqsDiffs += success;  }
331
332                                 }
333                 
334                                 if(pDataArray->numFPrimers != 0){
335                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
336                                         if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
337                                         else{ currentSeqsDiffs += success;  }
338                                 }
339                                 
340                                 if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
341                                 
342                                 if(pDataArray->numRPrimers != 0){
343                                         success = trimOligos->stripReverse(currSeq, currQual);
344                                         if(!success)                            {       trashCode += 'r';       }
345                                 }
346                 
347                 if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
348                     int thisSuccess = 0;
349                     string thisTrashCode = "";
350                     int thisCurrentSeqsDiffs = 0;
351                     
352                     int thisBarcodeIndex = 0;
353                     int thisPrimerIndex = 0;
354                     
355                     if(numBarcodes != 0){
356                         thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
357                         if(thisSuccess > pDataArray->bdiffs)            {       thisTrashCode += 'b';   }
358                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
359                     }
360                     
361                     if(pDataArray->numFPrimers != 0){
362                         thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
363                         if(thisSuccess > pDataArray->pdiffs)            {       thisTrashCode += 'f';   }
364                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
365                     }
366                     
367                     if (thisCurrentSeqsDiffs > pDataArray->tdiffs)      {       thisTrashCode += 't';   }
368                     
369                     if (thisTrashCode == "") {
370                         trashCode = thisTrashCode;
371                         success = thisSuccess;
372                         currentSeqsDiffs = thisCurrentSeqsDiffs;
373                         barcodeIndex = thisBarcodeIndex;
374                         primerIndex = thisPrimerIndex;
375                     }
376                 }
377
378                 
379                                 if(pDataArray->keepFirst != 0){
380                                         //success = keepFirstTrim(currSeq, currQual);
381                     success = 1;
382                     if(currQual.getName() != ""){
383                         currQual.trimQScores(-1, pDataArray->keepFirst);
384                     }
385                     currSeq.trim(pDataArray->keepFirst);
386                                 }
387                                 
388                                 if(pDataArray->removeLast != 0){
389                                         //success = removeLastTrim(currSeq, currQual);
390                     success = 0;
391                     int length = currSeq.getNumBases() - pDataArray->removeLast;
392                     
393                     if(length > 0){
394                         if(currQual.getName() != ""){
395                             currQual.trimQScores(-1, length);
396                         }
397                         currSeq.trim(length);
398                         success = 1;
399                     }
400                     else{ success = 0; }
401                     
402                                         if(!success)                            {       trashCode += 'l';       }
403                                 }
404                 
405                                 
406                                 if(pDataArray->qFileName != ""){
407                                         int origLength = currSeq.getNumBases();
408                                         
409                                         if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
410                                         else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
411                                         else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
412                                         else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
413                                         else                                            {       success = 1;                            }
414                                         
415                                         //you don't want to trim, if it fails above then scrap it
416                                         if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
417                                         
418                                         if(!success)                            {       trashCode += 'q';       }
419                                 }                               
420                 
421                                 if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
422                                         //success = cullLength(currSeq);
423                     int length = currSeq.getNumBases();
424                     success = 0;        //guilty until proven innocent
425                     if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                   {       success = 1;    }
426                     else if(length >= pDataArray->minLength && length <= pDataArray->maxLength) {       success = 1;    }
427                     else                                                                                                {       success = 0;    }
428                     
429                                         if(!success)                            {       trashCode += 'l';       }
430                                 }
431                                 if(pDataArray->maxHomoP > 0){
432                                         //success = cullHomoP(currSeq);
433                     int longHomoP = currSeq.getLongHomoPolymer();
434                     success = 0;        //guilty until proven innocent
435                     if(longHomoP <= pDataArray->maxHomoP){      success = 1;    }
436                     else                                        {       success = 0;    }
437                     
438                                         if(!success)                            {       trashCode += 'h';       }
439                                 }
440                                 if(pDataArray->maxAmbig != -1){
441                                         //success = cullAmbigs(currSeq);
442                     int numNs = currSeq.getAmbigBases();
443                     success = 0;        //guilty until proven innocent
444                     if(numNs <= pDataArray->maxAmbig)   {       success = 1;    }
445                     else                                        {       success = 0;    }
446                                         if(!success)                            {       trashCode += 'n';       }
447                                 }
448                                 
449                                 if(pDataArray->flip){           // should go last                       
450                                         currSeq.reverseComplement();
451                                         if(pDataArray->qFileName != ""){
452                                                 currQual.flipQScores(); 
453                                         }
454                                 }
455                                 
456                                 if(trashCode.length() == 0){
457                     string thisGroup = "";
458                     if (pDataArray->createGroup) {
459                                                 if(numBarcodes != 0){
460                                                         thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
461                                                         if (pDataArray->numFPrimers != 0) {
462                                                                 if (pDataArray->primerNameVector[primerIndex] != "") { 
463                                                                         if(thisGroup != "") {
464                                                                                 thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
465                                                                         }else {
466                                                                                 thisGroup = pDataArray->primerNameVector[primerIndex]; 
467                                                                         }
468                                                                 } 
469                                                         }
470                         }
471                     }
472                     
473                     int pos = thisGroup.find("ignore");
474                     if (pos == string::npos) {
475                         
476                         currSeq.setAligned(currSeq.getUnaligned());
477                         currSeq.printSequence(trimFASTAFile);
478                         
479                         if(pDataArray->qFileName != ""){
480                             currQual.printQScores(trimQualFile);
481                         }
482                         
483                         if(pDataArray->nameFile != ""){
484                             map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
485                             if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
486                             else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
487                         }
488                         
489                         int numRedundants = 0;
490                         if (pDataArray->countfile != "") {
491                             map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
492                             if (itCount != pDataArray->nameCount.end()) { 
493                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
494                                 numRedundants = itCount->second-1;
495                             }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
496                         }
497                         
498                         if (pDataArray->createGroup) {
499                             if(numBarcodes != 0){
500                                 
501                                 if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
502                                 else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
503                                 
504                                 if (pDataArray->nameFile != "") {
505                                     map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
506                                     if (itName != pDataArray->nameMap.end()) { 
507                                         vector<string> thisSeqsNames; 
508                                         pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
509                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
510                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
511                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
512                                         }
513                                     }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                       
514                                 }
515                                 
516                                 map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
517                                 if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
518                                 else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
519                                 
520                             }
521                         }
522                         
523                         if(pDataArray->allFiles){
524                             ofstream output;
525                             pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
526                             currSeq.printSequence(output);
527                             output.close();
528                             
529                             if(pDataArray->qFileName != ""){
530                                 pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
531                                 currQual.printQScores(output);
532                                 output.close();                                                 
533                             }
534                             
535                             if(pDataArray->nameFile != ""){
536                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
537                                 if (itName != pDataArray->nameMap.end()) { 
538                                     pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
539                                     output << itName->first << '\t' << itName->second << endl; 
540                                     output.close();
541                                 }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
542                             }
543                         }
544                     }
545                                 }
546                                 else{
547                                         if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
548                                                 map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
549                                                 if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
550                                                 else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
551                                         }
552                     if (pDataArray->countfile != "") {
553                         map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
554                         if (itCount != pDataArray->nameCount.end()) { 
555                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
556                         }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
557                     }
558                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
559                                         currSeq.setUnaligned(origSeq);
560                                         currSeq.setAligned(origSeq);
561                                         currSeq.printSequence(scrapFASTAFile);
562                                         if(pDataArray->qFileName != ""){
563                                                 currQual.printQScores(scrapQualFile);
564                                         }
565                                 }
566                                 
567                         }
568                         
569                         //report progress
570                         if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
571                         
572                 }
573                 //report progress
574                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
575                 
576                 delete trimOligos;
577                 inFASTA.close();
578                 trimFASTAFile.close();
579                 scrapFASTAFile.close();
580                 if (pDataArray->createGroup) {   outGroupsFile.close();   }
581                 if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
582                 if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
583                 
584         return 0;
585             
586         }
587         catch(exception& e) {
588             pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
589             exit(1);
590         }
591     } 
592 #endif
593     
594
595 /**************************************************************************************************/
596
597 #endif