]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         else { m->setFastaFile(fastaFile); }
193                         
194                         //if the user changes the output directory command factory will send this info to us in the output parameter 
195                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
196                                 outputDir = ""; 
197                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
198                         }
199                 
200                         
201                         //check for optional parameter and set defaults
202                         // ...at some point should added some additional type checking...
203                         string temp;
204                         temp = validParameter.validFile(parameters, "flip", false);
205                         if (temp == "not found"){       flip = 0;       }
206                         else if(m->isTrue(temp))        {       flip = 1;       }
207                 
208                         temp = validParameter.validFile(parameters, "oligos", true);
209                         if (temp == "not found"){       oligoFile = "";         }
210                         else if(temp == "not open"){    abort = true;   } 
211                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
212                         
213                         
214                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
215                         convert(temp, maxAmbig);  
216
217                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
218                         convert(temp, maxHomoP);  
219
220                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
221                         convert(temp, minLength); 
222                         
223                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
224                         convert(temp, maxLength);
225                         
226                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
227                         convert(temp, bdiffs);
228                         
229                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
230                         convert(temp, pdiffs);
231                         
232                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
233                         convert(temp, tdiffs);
234                         
235                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
236                         
237                         temp = validParameter.validFile(parameters, "qfile", true);     
238                         if (temp == "not found")        {       qFileName = "";         }
239                         else if(temp == "not open")     {       abort = true;           }
240                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
241                         
242                         temp = validParameter.validFile(parameters, "name", true);      
243                         if (temp == "not found")        {       nameFile = "";          }
244                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
245                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
246                         
247                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
248                         convert(temp, qThreshold);
249                         
250                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
251                         qtrim = m->isTrue(temp);
252
253                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
254                         convert(temp, qRollAverage);
255
256                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
257                         convert(temp, qWindowAverage);
258
259                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
260                         convert(temp, qWindowSize);
261
262                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
263                         convert(temp, qWindowStep);
264
265                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
266                         convert(temp, qAverage);
267
268                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
269                         convert(temp, keepFirst);
270
271                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
272                         convert(temp, removeLast);
273                         
274                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
275                         allFiles = m->isTrue(temp);
276                         
277                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
278                         m->setProcessors(temp);
279                         convert(temp, processors); 
280                         
281                         
282                         if(allFiles && (oligoFile == "")){
283                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
284                         }
285                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
286                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
287                                 qAverage=0;
288                                 qThreshold=0;
289                         }
290                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
291                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
292                                 abort = true;
293                         }
294                 }
295
296         }
297         catch(exception& e) {
298                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
299                 exit(1);
300         }
301 }
302 //***************************************************************************************************************
303
304 int TrimSeqsCommand::execute(){
305         try{
306         
307                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
308                 
309                 numFPrimers = 0;  //this needs to be initialized
310                 numRPrimers = 0;
311                 vector<vector<string> > fastaFileNames;
312                 vector<vector<string> > qualFileNames;
313                 vector<vector<string> > nameFileNames;
314                 
315                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
316                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
317                 
318                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
319                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
320                 
321                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
322                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
323                 
324                 if (qFileName != "") {
325                         outputNames.push_back(trimQualFile);
326                         outputNames.push_back(scrapQualFile);
327                         outputTypes["qfile"].push_back(trimQualFile);
328                         outputTypes["qfile"].push_back(scrapQualFile); 
329                 }
330                 
331                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
332                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
333                 
334                 if (nameFile != "") {
335                         m->readNames(nameFile, nameMap);
336                         outputNames.push_back(trimNameFile);
337                         outputNames.push_back(scrapNameFile);
338                         outputTypes["name"].push_back(trimNameFile);
339                         outputTypes["name"].push_back(scrapNameFile); 
340                 }
341                 
342                 if (m->control_pressed) { return 0; }
343                 
344                 string outputGroupFileName;
345                 if(oligoFile != ""){
346                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
347                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
348                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
349                 }
350                 
351                 vector<unsigned long int> fastaFilePos;
352                 vector<unsigned long int> qFilePos;
353                 
354                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
355                 
356                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
357                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
358                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
359                 }       
360                 if(qFileName == "")     {       qLines = lines; } //files with duds
361                 
362                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
363                                 if(processors == 1){
364                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
365                                 }else{
366                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
367                                 }       
368                 #else
369                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
370                 #endif
371                 
372                 if (m->control_pressed) {  return 0; }                  
373         
374                 if(allFiles){
375                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
376                         map<string, string>::iterator it;
377                         set<string> namesToRemove;
378                         for(int i=0;i<fastaFileNames.size();i++){
379                                 for(int j=0;j<fastaFileNames[0].size();j++){
380                                         if (fastaFileNames[i][j] != "") {
381                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
382                                                         if(m->isBlank(fastaFileNames[i][j])){
383                                                                 m->mothurRemove(fastaFileNames[i][j]);
384                                                                 namesToRemove.insert(fastaFileNames[i][j]);
385                                                         
386                                                                 if(qFileName != ""){
387                                                                         m->mothurRemove(qualFileNames[i][j]);
388                                                                         namesToRemove.insert(qualFileNames[i][j]);
389                                                                 }
390                                                                 
391                                                                 if(nameFile != ""){
392                                                                         m->mothurRemove(nameFileNames[i][j]);
393                                                                         namesToRemove.insert(nameFileNames[i][j]);
394                                                                 }
395                                                         }else{  
396                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
397                                                                 if (it == uniqueFastaNames.end()) {     
398                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
399                                                                 }       
400                                                         }
401                                                 }
402                                         }
403                                 }
404                         }
405                         
406                         //remove names for outputFileNames, just cleans up the output
407                         vector<string> outputNames2;
408                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
409                         outputNames = outputNames2;
410                         
411                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
412                                 ifstream in;
413                                 m->openInputFile(it->first, in);
414                                 
415                                 ofstream out;
416                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
417                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
418                                 m->openOutputFile(thisGroupName, out);
419                                 
420                                 while (!in.eof()){
421                                         if (m->control_pressed) { break; }
422                                         
423                                         Sequence currSeq(in); m->gobble(in);
424                                         out << currSeq.getName() << '\t' << it->second << endl;
425                                 }
426                                 in.close();
427                                 out.close();
428                         }
429                 }
430                 
431                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
432
433                 //output group counts
434                 m->mothurOutEndLine();
435                 int total = 0;
436                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
437                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
438                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
439                 }
440                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
441                 
442                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
443
444                 //set fasta file as new current fastafile
445                 string current = "";
446                 itTypes = outputTypes.find("fasta");
447                 if (itTypes != outputTypes.end()) {
448                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
449                 }
450                 
451                 itTypes = outputTypes.find("name");
452                 if (itTypes != outputTypes.end()) {
453                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
454                 }
455                 
456                 itTypes = outputTypes.find("qfile");
457                 if (itTypes != outputTypes.end()) {
458                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
459                 }
460                 
461                 itTypes = outputTypes.find("group");
462                 if (itTypes != outputTypes.end()) {
463                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
464                 }
465
466                 m->mothurOutEndLine();
467                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
468                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
469                 m->mothurOutEndLine();
470                 
471                 return 0;       
472                         
473         }
474         catch(exception& e) {
475                 m->errorOut(e, "TrimSeqsCommand", "execute");
476                 exit(1);
477         }
478 }
479                 
480 /**************************************************************************************/
481
482 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
483                 
484         try {
485                 
486                 ofstream trimFASTAFile;
487                 m->openOutputFile(trimFileName, trimFASTAFile);
488                 
489                 ofstream scrapFASTAFile;
490                 m->openOutputFile(scrapFileName, scrapFASTAFile);
491                 
492                 ofstream trimQualFile;
493                 ofstream scrapQualFile;
494                 if(qFileName != ""){
495                         m->openOutputFile(trimQFileName, trimQualFile);
496                         m->openOutputFile(scrapQFileName, scrapQualFile);
497                 }
498                 
499                 ofstream trimNameFile;
500                 ofstream scrapNameFile;
501                 if(nameFile != ""){
502                         m->openOutputFile(trimNFileName, trimNameFile);
503                         m->openOutputFile(scrapNFileName, scrapNameFile);
504                 }
505                 
506                 
507                 ofstream outGroupsFile;
508                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
509                 if(allFiles){
510                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
511                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
512                                         if (fastaFileNames[i][j] != "") {
513                                                 ofstream temp;
514                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
515                                                 if(qFileName != ""){
516                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
517                                                 }
518                                                 
519                                                 if(nameFile != ""){
520                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
521                                                 }
522                                         }
523                                 }
524                         }
525                 }
526                 
527                 ifstream inFASTA;
528                 m->openInputFile(filename, inFASTA);
529                 inFASTA.seekg(line->start);
530                 
531                 ifstream qFile;
532                 if(qFileName != "")     {
533                         m->openInputFile(qFileName, qFile);
534                         qFile.seekg(qline->start);  
535                 }
536                 
537                 int count = 0;
538                 bool moreSeqs = 1;
539         
540                 while (moreSeqs) {
541                                 
542                         if (m->control_pressed) { 
543                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
544                                 if (oligoFile != "") {   outGroupsFile.close();   }
545
546                                 if(qFileName != ""){
547                                         qFile.close();
548                                 }
549                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
550
551                                 return 0;
552                         }
553                         
554                         int success = 1;
555                         string trashCode = "";
556                         int currentSeqsDiffs = 0;
557
558                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
559                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
560                         QualityScores currQual;
561                         if(qFileName != ""){
562                                 currQual = QualityScores(qFile);  m->gobble(qFile);
563                         }
564                         
565                         string origSeq = currSeq.getUnaligned();
566                         if (origSeq != "") {
567                                 
568                                 int barcodeIndex = 0;
569                                 int primerIndex = 0;
570                                 
571                                 if(barcodes.size() != 0){
572                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
573                                         if(success > bdiffs)            {       trashCode += 'b';       }
574                                         else{ currentSeqsDiffs += success;  }
575                                 }
576                                 
577                                 if(numFPrimers != 0){
578                                         success = stripForward(currSeq, currQual, primerIndex);
579                                         if(success > pdiffs)            {       trashCode += 'f';       }
580                                         else{ currentSeqsDiffs += success;  }
581                                 }
582                                 
583                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
584                                 
585                                 if(numRPrimers != 0){
586                                         success = stripReverse(currSeq, currQual);
587                                         if(!success)                            {       trashCode += 'r';       }
588                                 }
589
590                                 if(keepFirst != 0){
591                                         success = keepFirstTrim(currSeq, currQual);
592                                 }
593                                 
594                                 if(removeLast != 0){
595                                         success = removeLastTrim(currSeq, currQual);
596                                         if(!success)                            {       trashCode += 'l';       }
597                                 }
598
599                                 
600                                 if(qFileName != ""){
601                                         int origLength = currSeq.getNumBases();
602                                         
603                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
604                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
605                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
606                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
607                                         else                                            {       success = 1;                            }
608                                         
609                                         //you don't want to trim, if it fails above then scrap it
610                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
611                                         
612                                         if(!success)                            {       trashCode += 'q';       }
613                                 }                               
614                 
615                                 if(minLength > 0 || maxLength > 0){
616                                         success = cullLength(currSeq);
617                                         if(!success)                            {       trashCode += 'l';       }
618                                 }
619                                 if(maxHomoP > 0){
620                                         success = cullHomoP(currSeq);
621                                         if(!success)                            {       trashCode += 'h';       }
622                                 }
623                                 if(maxAmbig != -1){
624                                         success = cullAmbigs(currSeq);
625                                         if(!success)                            {       trashCode += 'n';       }
626                                 }
627                                 
628                                 if(flip){               // should go last                       
629                                         currSeq.reverseComplement();
630                                         if(qFileName != ""){
631                                                 currQual.flipQScores(); 
632                                         }
633                                 }
634                                 
635                                 if(trashCode.length() == 0){
636                                         currSeq.setAligned(currSeq.getUnaligned());
637                                         currSeq.printSequence(trimFASTAFile);
638                                         
639                                         if(qFileName != ""){
640                                                 currQual.printQScores(trimQualFile);
641                                         }
642                                         
643                                         if(nameFile != ""){
644                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
645                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
646                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
647                                         }
648                                         
649                                         if(barcodes.size() != 0){
650                                                 string thisGroup = barcodeNameVector[barcodeIndex];
651                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
652                                                 
653                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
654                                                 
655                                                 if (nameFile != "") {
656                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
657                                                         if (itName != nameMap.end()) { 
658                                                                 vector<string> thisSeqsNames; 
659                                                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
660                                                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
661                                                                         outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
662                                                                 }
663                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
664                                                 }
665                                                 
666                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
667                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
668                                                 else { groupCounts[it->first]++; }
669                                                         
670                                         }
671                                         
672                                         
673                                         if(allFiles){
674                                                 ofstream output;
675                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
676                                                 currSeq.printSequence(output);
677                                                 output.close();
678                                                 
679                                                 if(qFileName != ""){
680                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
681                                                         currQual.printQScores(output);
682                                                         output.close();                                                 
683                                                 }
684                                                 
685                                                 if(nameFile != ""){
686                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
687                                                         if (itName != nameMap.end()) { 
688                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
689                                                                 output << itName->first << '\t' << itName->second << endl; 
690                                                                 output.close();
691                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
692                                                 }
693                                         }
694                                 }
695                                 else{
696                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
697                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
698                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
699                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
700                                         }
701                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
702                                         currSeq.setUnaligned(origSeq);
703                                         currSeq.setAligned(origSeq);
704                                         currSeq.printSequence(scrapFASTAFile);
705                                         if(qFileName != ""){
706                                                 currQual.printQScores(scrapQualFile);
707                                         }
708                                 }
709                                 count++;
710                         }
711                         
712                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
713                                 unsigned long int pos = inFASTA.tellg();
714                                 if ((pos == -1) || (pos >= line->end)) { break; }
715                         
716                         #else
717                                 if (inFASTA.eof()) { break; }
718                         #endif
719                         
720                         //report progress
721                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
722                         
723                 }
724                 //report progress
725                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
726                 
727                 
728                 inFASTA.close();
729                 trimFASTAFile.close();
730                 scrapFASTAFile.close();
731                 if (oligoFile != "") {   outGroupsFile.close();   }
732                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
733                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
734                 
735                 return count;
736         }
737         catch(exception& e) {
738                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
739                 exit(1);
740         }
741 }
742
743 /**************************************************************************************************/
744
745 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
746         try {
747 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
748                 int process = 1;
749                 int exitCommand = 1;
750                 processIDS.clear();
751                 
752                 //loop through and create all the processes you want
753                 while (process != processors) {
754                         int pid = fork();
755                         
756                         if (pid > 0) {
757                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
758                                 process++;
759                         }else if (pid == 0){
760                                 
761                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
762                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
763                                 vector<vector<string> > tempNameFileNames = nameFileNames;
764
765                                 if(allFiles){
766                                         ofstream temp;
767
768                                         for(int i=0;i<tempFASTAFileNames.size();i++){
769                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
770                                                         if (tempFASTAFileNames[i][j] != "") {
771                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
772                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
773
774                                                                 if(qFileName != ""){
775                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
776                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
777                                                                 }
778                                                                 if(nameFile != ""){
779                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
780                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
781                                                                 }
782                                                         }
783                                                 }
784                                         }
785                                 }
786                                                         
787                                 driverCreateTrim(filename,
788                                                                  qFileName,
789                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
790                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
791                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
792                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
793                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
794                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
795                                                                  (groupFile + toString(getpid()) + ".temp"),
796                                                                  tempFASTAFileNames,
797                                                                  tempPrimerQualFileNames,
798                                                                  tempNameFileNames,
799                                                                  lines[process],
800                                                                  qLines[process]);
801                                 
802                                 //pass groupCounts to parent
803                                 if(oligoFile != ""){
804                                         ofstream out;
805                                         string tempFile = filename + toString(getpid()) + ".num.temp";
806                                         m->openOutputFile(tempFile, out);
807                                         
808                                         out << groupCounts.size() << endl;
809                                         
810                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
811                                                 out << it->first << '\t' << it->second << endl;
812                                         }
813                                         out.close();
814                                 }
815                                 exit(0);
816                         }else { 
817                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
818                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
819                                 exit(0);
820                         }
821                 }
822                 
823                 //parent do my part
824                 ofstream temp;
825                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
826                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
827                 if(qFileName != ""){
828                         m->openOutputFile(trimQualFileName, temp);              temp.close();
829                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
830                 }
831                 if (nameFile != "") {
832                         m->openOutputFile(trimNameFileName, temp);              temp.close();
833                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
834                 }
835
836                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
837                 
838                 //force parent to wait until all the processes are done
839                 for (int i=0;i<processIDS.size();i++) { 
840                         int temp = processIDS[i];
841                         wait(&temp);
842                 }
843                 
844                 //append files
845                 for(int i=0;i<processIDS.size();i++){
846                         
847                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
848                         
849                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
850                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
851                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
852                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
853                         
854                         if(qFileName != ""){
855                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
856                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
857                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
858                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
859                         }
860                         
861                         if(nameFile != ""){
862                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
863                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
864                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
865                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
866                         }
867                         
868                         if(oligoFile != ""){
869                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
870                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
871                         }
872                         
873                         
874                         if(allFiles){
875                                 for(int j=0;j<fastaFileNames.size();j++){
876                                         for(int k=0;k<fastaFileNames[j].size();k++){
877                                                 if (fastaFileNames[j][k] != "") {
878                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
879                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
880                                                         
881                                                         if(qFileName != ""){
882                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
883                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
884                                                         }
885                                                         
886                                                         if(nameFile != ""){
887                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
888                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
889                                                         }
890                                                 }
891                                         }
892                                 }
893                         }
894                         
895                         if(oligoFile != ""){
896                                 ifstream in;
897                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
898                                 m->openInputFile(tempFile, in);
899                                 int tempNum;
900                                 string group;
901                                 
902                                 in >> tempNum; m->gobble(in);
903                                 
904                                 if (tempNum != 0) {
905                                         while (!in.eof()) { 
906                                                 in >> group >> tempNum; m->gobble(in);
907                                 
908                                                 map<string, int>::iterator it = groupCounts.find(group);
909                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
910                                                 else { groupCounts[it->first] += tempNum; }
911                                         }
912                                 }
913                                 in.close(); m->mothurRemove(tempFile);
914                         }
915                         
916                 }
917         
918                 return exitCommand;
919 #endif          
920         }
921         catch(exception& e) {
922                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
923                 exit(1);
924         }
925 }
926
927 /**************************************************************************************************/
928
929 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
930         try {
931                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
932                 //set file positions for fasta file
933                 fastaFilePos = m->divideFile(filename, processors);
934                 
935                 if (qfilename == "") { return processors; }
936                 
937                 //get name of first sequence in each chunk
938                 map<string, int> firstSeqNames;
939                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
940                         ifstream in;
941                         m->openInputFile(filename, in);
942                         in.seekg(fastaFilePos[i]);
943                 
944                         Sequence temp(in); 
945                         firstSeqNames[temp.getName()] = i;
946                 
947                         in.close();
948                 }
949                                 
950                 //seach for filePos of each first name in the qfile and save in qfileFilePos
951                 ifstream inQual;
952                 m->openInputFile(qfilename, inQual);
953                 
954                 string input;
955                 while(!inQual.eof()){   
956                         input = m->getline(inQual);
957
958                         if (input.length() != 0) {
959                                 if(input[0] == '>'){ //this is a sequence name line
960                                         istringstream nameStream(input);
961                                         
962                                         string sname = "";  nameStream >> sname;
963                                         sname = sname.substr(1);
964                                         
965                                         map<string, int>::iterator it = firstSeqNames.find(sname);
966                                         
967                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
968                                                 unsigned long int pos = inQual.tellg(); 
969                                                 qfileFilePos.push_back(pos - input.length() - 1);       
970                                                 firstSeqNames.erase(it);
971                                         }
972                                 }
973                         }
974                         
975                         if (firstSeqNames.size() == 0) { break; }
976                 }
977                 inQual.close();
978                 
979                 
980                 if (firstSeqNames.size() != 0) { 
981                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
982                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
983                         }
984                         qFileName = "";
985                         return processors;
986                 }
987
988                 //get last file position of qfile
989                 FILE * pFile;
990                 unsigned long int size;
991                 
992                 //get num bytes in file
993                 pFile = fopen (qfilename.c_str(),"rb");
994                 if (pFile==NULL) perror ("Error opening file");
995                 else{
996                         fseek (pFile, 0, SEEK_END);
997                         size=ftell (pFile);
998                         fclose (pFile);
999                 }
1000                 
1001                 qfileFilePos.push_back(size);
1002                 
1003                 return processors;
1004                 
1005                 #else
1006                 
1007                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1008                         //get last file position of fastafile
1009                         FILE * pFile;
1010                         unsigned long int size;
1011                         
1012                         //get num bytes in file
1013                         pFile = fopen (filename.c_str(),"rb");
1014                         if (pFile==NULL) perror ("Error opening file");
1015                         else{
1016                                 fseek (pFile, 0, SEEK_END);
1017                                 size=ftell (pFile);
1018                                 fclose (pFile);
1019                         }
1020                         fastaFilePos.push_back(size);
1021                         
1022                         //get last file position of fastafile
1023                         FILE * qFile;
1024                         
1025                         //get num bytes in file
1026                         qFile = fopen (qfilename.c_str(),"rb");
1027                         if (qFile==NULL) perror ("Error opening file");
1028                         else{
1029                                 fseek (qFile, 0, SEEK_END);
1030                                 size=ftell (qFile);
1031                                 fclose (qFile);
1032                         }
1033                         qfileFilePos.push_back(size);
1034                 
1035                         return 1;
1036                 
1037                 #endif
1038         }
1039         catch(exception& e) {
1040                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1041                 exit(1);
1042         }
1043 }
1044
1045 //***************************************************************************************************************
1046
1047 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1048         try {
1049                 ifstream inOligos;
1050                 m->openInputFile(oligoFile, inOligos);
1051                 
1052                 ofstream test;
1053                 
1054                 string type, oligo, group;
1055
1056                 int indexPrimer = 0;
1057                 int indexBarcode = 0;
1058                 
1059                 while(!inOligos.eof()){
1060
1061                         inOligos >> type; 
1062                                         
1063                         if(type[0] == '#'){
1064                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1065                                 m->gobble(inOligos);
1066                         }
1067                         else{
1068                                 m->gobble(inOligos);
1069                                 //make type case insensitive
1070                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1071                                 
1072                                 inOligos >> oligo;
1073                                 
1074                                 for(int i=0;i<oligo.length();i++){
1075                                         oligo[i] = toupper(oligo[i]);
1076                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1077                                 }
1078                                 
1079                                 if(type == "FORWARD"){
1080                                         group = "";
1081                                         
1082                                         // get rest of line in case there is a primer name
1083                                         while (!inOligos.eof()) {       
1084                                                 char c = inOligos.get(); 
1085                                                 if (c == 10 || c == 13){        break;  }
1086                                                 else if (c == 32 || c == 9){;} //space or tab
1087                                                 else {  group += c;  }
1088                                         } 
1089                                         
1090                                         //check for repeat barcodes
1091                                         map<string, int>::iterator itPrime = primers.find(oligo);
1092                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1093                                         
1094                                         primers[oligo]=indexPrimer; indexPrimer++;              
1095                                         primerNameVector.push_back(group);
1096                                 }
1097                                 else if(type == "REVERSE"){
1098                                         Sequence oligoRC("reverse", oligo);
1099                                         oligoRC.reverseComplement();
1100                                         revPrimer.push_back(oligoRC.getUnaligned());
1101                                 }
1102                                 else if(type == "BARCODE"){
1103                                         inOligos >> group;
1104                                         
1105                                         //check for repeat barcodes
1106                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1107                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1108                                                 
1109                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1110                                         barcodeNameVector.push_back(group);
1111                                 }
1112                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1113                         }
1114                         m->gobble(inOligos);
1115                 }       
1116                 inOligos.close();
1117                 
1118                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1119                 
1120                 //add in potential combos
1121                 if(barcodeNameVector.size() == 0){
1122                         barcodes[""] = 0;
1123                         barcodeNameVector.push_back("");                        
1124                 }
1125                 
1126                 if(primerNameVector.size() == 0){
1127                         primers[""] = 0;
1128                         primerNameVector.push_back("");                 
1129                 }
1130                 
1131                 fastaFileNames.resize(barcodeNameVector.size());
1132                 for(int i=0;i<fastaFileNames.size();i++){
1133                         fastaFileNames[i].assign(primerNameVector.size(), "");
1134                 }
1135                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1136                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1137                 
1138                 if(allFiles){
1139                         set<string> uniqueNames; //used to cleanup outputFileNames
1140                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1141                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1142                                         
1143                                         string primerName = primerNameVector[itPrimer->second];
1144                                         string barcodeName = barcodeNameVector[itBar->second];
1145                                         
1146                                         string comboGroupName = "";
1147                                         string fastaFileName = "";
1148                                         string qualFileName = "";
1149                                         string nameFileName = "";
1150                                         
1151                                         if(primerName == ""){
1152                                                 comboGroupName = barcodeNameVector[itBar->second];
1153                                         }
1154                                         else{
1155                                                 if(barcodeName == ""){
1156                                                         comboGroupName = primerNameVector[itPrimer->second];
1157                                                 }
1158                                                 else{
1159                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1160                                                 }
1161                                         }
1162                                         
1163                                         
1164                                         ofstream temp;
1165                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1166                                         if (uniqueNames.count(fastaFileName) == 0) {
1167                                                 outputNames.push_back(fastaFileName);
1168                                                 outputTypes["fasta"].push_back(fastaFileName);
1169                                                 uniqueNames.insert(fastaFileName);
1170                                         }
1171                                         
1172                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1173                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1174                                         
1175                                         if(qFileName != ""){
1176                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1177                                                 if (uniqueNames.count(qualFileName) == 0) {
1178                                                         outputNames.push_back(qualFileName);
1179                                                         outputTypes["qfile"].push_back(qualFileName);
1180                                                 }
1181                                                 
1182                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1183                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1184                                         }
1185                                         
1186                                         if(nameFile != ""){
1187                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1188                                                 if (uniqueNames.count(nameFileName) == 0) {
1189                                                         outputNames.push_back(nameFileName);
1190                                                         outputTypes["name"].push_back(nameFileName);
1191                                                 }
1192                                                 
1193                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1194                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1195                                         }
1196                                         
1197                                 }
1198                         }
1199                 }
1200                 numFPrimers = primers.size();
1201                 numRPrimers = revPrimer.size();
1202
1203         }
1204         catch(exception& e) {
1205                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1206                 exit(1);
1207         }
1208 }
1209
1210 //***************************************************************************************************************
1211
1212 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1213         try {
1214                 
1215                 string rawSequence = seq.getUnaligned();
1216                 int success = bdiffs + 1;       //guilty until proven innocent
1217                 
1218                 //can you find the barcode
1219                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1220                         string oligo = it->first;
1221                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1222                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1223                                 break;  
1224                         }
1225                         
1226                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1227                                 group = it->second;
1228                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1229                                 
1230                                 if(qual.getName() != ""){
1231                                         qual.trimQScores(oligo.length(), -1);
1232                                 }
1233                                 
1234                                 success = 0;
1235                                 break;
1236                         }
1237                 }
1238                 
1239                 //if you found the barcode or if you don't want to allow for diffs
1240                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1241                 
1242                 else { //try aligning and see if you can find it
1243
1244                         int maxLength = 0;
1245
1246                         Alignment* alignment;
1247                         if (barcodes.size() > 0) {
1248                                 map<string,int>::iterator it=barcodes.begin();
1249
1250                                 for(it;it!=barcodes.end();it++){
1251                                         if(it->first.length() > maxLength){
1252                                                 maxLength = it->first.length();
1253                                         }
1254                                 }
1255                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1256
1257                         }else{ alignment = NULL; } 
1258                         
1259                         //can you find the barcode
1260                         int minDiff = 1e6;
1261                         int minCount = 1;
1262                         int minGroup = -1;
1263                         int minPos = 0;
1264                         
1265                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1266                                 string oligo = it->first;
1267 //                              int length = oligo.length();
1268                                 
1269                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1270                                         success = bdiffs + 10;
1271                                         break;
1272                                 }
1273                                 
1274                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1275                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1276                                 oligo = alignment->getSeqAAln();
1277                                 string temp = alignment->getSeqBAln();
1278                 
1279                                 int alnLength = oligo.length();
1280                                 
1281                                 for(int i=oligo.length()-1;i>=0;i--){
1282                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1283                                 }
1284                                 oligo = oligo.substr(0,alnLength);
1285                                 temp = temp.substr(0,alnLength);
1286                                 
1287                                 int numDiff = countDiffs(oligo, temp);
1288                                 
1289                                 if(numDiff < minDiff){
1290                                         minDiff = numDiff;
1291                                         minCount = 1;
1292                                         minGroup = it->second;
1293                                         minPos = 0;
1294                                         for(int i=0;i<alnLength;i++){
1295                                                 if(temp[i] != '-'){
1296                                                         minPos++;
1297                                                 }
1298                                         }
1299                                 }
1300                                 else if(numDiff == minDiff){
1301                                         minCount++;
1302                                 }
1303
1304                         }
1305
1306                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1307                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1308                         else{                                                                                                   //use the best match
1309                                 group = minGroup;
1310                                 seq.setUnaligned(rawSequence.substr(minPos));
1311                                 
1312                                 if(qual.getName() != ""){
1313                                         qual.trimQScores(minPos, -1);
1314                                 }
1315                                 success = minDiff;
1316                         }
1317                         
1318                         if (alignment != NULL) {  delete alignment;  }
1319                         
1320                 }
1321         
1322                 return success;
1323                 
1324         }
1325         catch(exception& e) {
1326                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1327                 exit(1);
1328         }
1329
1330 }
1331
1332 //***************************************************************************************************************
1333
1334 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1335         try {
1336                 string rawSequence = seq.getUnaligned();
1337                 int success = pdiffs + 1;       //guilty until proven innocent
1338                 
1339                 //can you find the primer
1340                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1341                         string oligo = it->first;
1342                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1343                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1344                                 break;  
1345                         }
1346                         
1347                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1348                                 group = it->second;
1349                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1350                                 if(qual.getName() != ""){
1351                                         qual.trimQScores(oligo.length(), -1);
1352                                 }
1353                                 success = 0;
1354                                 break;
1355                         }
1356                 }
1357
1358                 //if you found the barcode or if you don't want to allow for diffs
1359                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1360                 
1361                 else { //try aligning and see if you can find it
1362
1363                         int maxLength = 0;
1364
1365                         Alignment* alignment;
1366                         if (primers.size() > 0) {
1367                                 map<string,int>::iterator it=primers.begin();
1368
1369                                 for(it;it!=primers.end();it++){
1370                                         if(it->first.length() > maxLength){
1371                                                 maxLength = it->first.length();
1372                                         }
1373                                 }
1374                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1375
1376                         }else{ alignment = NULL; } 
1377                         
1378                         //can you find the barcode
1379                         int minDiff = 1e6;
1380                         int minCount = 1;
1381                         int minGroup = -1;
1382                         int minPos = 0;
1383                         
1384                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1385                                 string oligo = it->first;
1386 //                              int length = oligo.length();
1387                                 
1388                                 if(rawSequence.length() < maxLength){   
1389                                         success = pdiffs + 100;
1390                                         break;
1391                                 }
1392                                 
1393                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1394                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1395                                 oligo = alignment->getSeqAAln();
1396                                 string temp = alignment->getSeqBAln();
1397                 
1398                                 int alnLength = oligo.length();
1399                                 
1400                                 for(int i=oligo.length()-1;i>=0;i--){
1401                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1402                                 }
1403                                 oligo = oligo.substr(0,alnLength);
1404                                 temp = temp.substr(0,alnLength);
1405                                 
1406                                 int numDiff = countDiffs(oligo, temp);
1407                                 
1408                                 if(numDiff < minDiff){
1409                                         minDiff = numDiff;
1410                                         minCount = 1;
1411                                         minGroup = it->second;
1412                                         minPos = 0;
1413                                         for(int i=0;i<alnLength;i++){
1414                                                 if(temp[i] != '-'){
1415                                                         minPos++;
1416                                                 }
1417                                         }
1418                                 }
1419                                 else if(numDiff == minDiff){
1420                                         minCount++;
1421                                 }
1422
1423                         }
1424
1425                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1426                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1427                         else{                                                                                                   //use the best match
1428                                 group = minGroup;
1429                                 seq.setUnaligned(rawSequence.substr(minPos));
1430                                 if(qual.getName() != ""){
1431                                         qual.trimQScores(minPos, -1);
1432                                 }
1433                                 success = minDiff;
1434                         }
1435                         
1436                         if (alignment != NULL) {  delete alignment;  }
1437                         
1438                 }
1439                 
1440                 return success;
1441
1442         }
1443         catch(exception& e) {
1444                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1445                 exit(1);
1446         }
1447 }
1448
1449 //***************************************************************************************************************
1450
1451 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1452         try {
1453                 string rawSequence = seq.getUnaligned();
1454                 bool success = 0;       //guilty until proven innocent
1455                 
1456                 for(int i=0;i<numRPrimers;i++){
1457                         string oligo = revPrimer[i];
1458                         
1459                         if(rawSequence.length() < oligo.length()){
1460                                 success = 0;
1461                                 break;
1462                         }
1463                         
1464                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1465                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1466                                 if(qual.getName() != ""){
1467                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1468                                 }
1469                                 success = 1;
1470                                 break;
1471                         }
1472                 }       
1473                 return success;
1474                 
1475         }
1476         catch(exception& e) {
1477                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1478                 exit(1);
1479         }
1480 }
1481
1482 //***************************************************************************************************************
1483
1484 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1485         try {
1486                 bool success = 1;
1487                 if(qscores.getName() != ""){
1488                         qscores.trimQScores(-1, keepFirst);
1489                 }
1490                 sequence.trim(keepFirst);
1491                 return success;
1492         }
1493         catch(exception& e) {
1494                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1495                 exit(1);
1496         }
1497         
1498 }       
1499
1500 //***************************************************************************************************************
1501
1502 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1503         try {
1504                 bool success = 0;
1505                 
1506                 int length = sequence.getNumBases() - removeLast;
1507                 
1508                 if(length > 0){
1509                         if(qscores.getName() != ""){
1510                                 qscores.trimQScores(-1, length);
1511                         }
1512                         sequence.trim(length);
1513                         success = 1;
1514                 }
1515                 else{
1516                         success = 0;
1517                 }
1518
1519                 return success;
1520         }
1521         catch(exception& e) {
1522                 m->errorOut(e, "removeLastTrim", "countDiffs");
1523                 exit(1);
1524         }
1525         
1526 }       
1527
1528 //***************************************************************************************************************
1529
1530 bool TrimSeqsCommand::cullLength(Sequence& seq){
1531         try {
1532         
1533                 int length = seq.getNumBases();
1534                 bool success = 0;       //guilty until proven innocent
1535                 
1536                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1537                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1538                 else                                                                                            {       success = 0;    }
1539                 
1540                 return success;
1541         
1542         }
1543         catch(exception& e) {
1544                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1545                 exit(1);
1546         }
1547         
1548 }
1549
1550 //***************************************************************************************************************
1551
1552 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1553         try {
1554                 int longHomoP = seq.getLongHomoPolymer();
1555                 bool success = 0;       //guilty until proven innocent
1556                 
1557                 if(longHomoP <= maxHomoP){      success = 1;    }
1558                 else                                    {       success = 0;    }
1559                 
1560                 return success;
1561         }
1562         catch(exception& e) {
1563                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1564                 exit(1);
1565         }
1566         
1567 }
1568
1569 //***************************************************************************************************************
1570
1571 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1572         try {
1573                 int numNs = seq.getAmbigBases();
1574                 bool success = 0;       //guilty until proven innocent
1575                 
1576                 if(numNs <= maxAmbig)   {       success = 1;    }
1577                 else                                    {       success = 0;    }
1578                 
1579                 return success;
1580         }
1581         catch(exception& e) {
1582                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1583                 exit(1);
1584         }
1585         
1586 }
1587
1588 //***************************************************************************************************************
1589
1590 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1591         try {
1592                 bool success = 1;
1593                 int length = oligo.length();
1594                 
1595                 for(int i=0;i<length;i++){
1596                         
1597                         if(oligo[i] != seq[i]){
1598                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1599                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1600                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1601                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1602                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1603                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1604                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1605                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1606                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1607                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1608                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1609                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1610                                 
1611                                 if(success == 0)        {       break;   }
1612                         }
1613                         else{
1614                                 success = 1;
1615                         }
1616                 }
1617                 
1618                 return success;
1619         }
1620         catch(exception& e) {
1621                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1622                 exit(1);
1623         }
1624
1625 }
1626
1627 //***************************************************************************************************************
1628
1629 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1630         try {
1631
1632                 int length = oligo.length();
1633                 int countDiffs = 0;
1634                 
1635                 for(int i=0;i<length;i++){
1636                                                                 
1637                         if(oligo[i] != seq[i]){
1638                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1639                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1640                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1641                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1642                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1643                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1644                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1645                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1646                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1647                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1648                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1649                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1650                         }
1651                         
1652                 }
1653                 
1654                 return countDiffs;
1655         }
1656         catch(exception& e) {
1657                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1658                 exit(1);
1659         }
1660
1661 }
1662
1663 //***************************************************************************************************************