]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
23                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
24 //        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
25 //              CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta.  It will also provide new quality files if the fastq or file parameter is used.\n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
58                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
59         helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
60         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
61         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
63                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
64         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
65         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
66                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
67                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
68         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
69                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
70                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
71                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
72         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
73                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
74                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
75         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
76         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
77         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
78         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
79         helpString += "The make.contigs command should be in the following format: \n";
80                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
81                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
82                 return helpString;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 string MakeContigsCommand::getOutputPattern(string type) {
91     try {
92         string pattern = "";
93         
94         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
95         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
96         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
97         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
98         
99         return pattern;
100     }
101     catch(exception& e) {
102         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
103         exit(1);
104     }
105 }
106 //**********************************************************************************************************************
107 MakeContigsCommand::MakeContigsCommand(){       
108         try {
109                 abort = true; calledHelp = true; 
110                 setParameters();
111                 vector<string> tempOutNames;
112                 outputTypes["fasta"] = tempOutNames;
113         outputTypes["group"] = tempOutNames;
114         outputTypes["report"] = tempOutNames;
115         }
116         catch(exception& e) {
117                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
118                 exit(1);
119         }
120 }
121 //**********************************************************************************************************************
122 MakeContigsCommand::MakeContigsCommand(string option)  {
123         try {
124                 abort = false; calledHelp = false;   
125         
126                 //allow user to run help
127                 if(option == "help") { help(); abort = true; calledHelp = true; }
128                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
129                 
130                 else {
131                         vector<string> myArray = setParameters();
132                         
133                         OptionParser parser(option);
134                         map<string, string> parameters = parser.getParameters(); 
135                         
136                         ValidParameters validParameter("pairwise.seqs");
137                         map<string, string>::iterator it;
138                         
139                         //check to make sure all parameters are valid for command
140                         for (it = parameters.begin(); it != parameters.end(); it++) { 
141                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
142                         }
143                         
144                         //initialize outputTypes
145                         vector<string> tempOutNames;
146                         outputTypes["fasta"] = tempOutNames;
147             outputTypes["report"] = tempOutNames;
148             outputTypes["group"] = tempOutNames;
149                         
150             
151                         //if the user changes the input directory command factory will send this info to us in the output parameter 
152                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
153                         if (inputDir == "not found"){   inputDir = "";          }
154                         else { 
155                                 string path;
156                 it = parameters.find("ffastq");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
162                                 }
163                 
164                 it = parameters.find("rfastq");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
170                                 }
171                 
172                 it = parameters.find("ffasta");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
178                                 }
179                 
180                 it = parameters.find("rfasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
186                                 }
187                 
188                 it = parameters.find("fqfile");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
194                                 }
195                 
196                 it = parameters.find("rqfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
202                                 }
203                 
204                 it = parameters.find("file");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
210                                 }
211                 
212                 it = parameters.find("oligos");
213                                 //user has given a template file
214                                 if(it != parameters.end()){ 
215                                         path = m->hasPath(it->second);
216                                         //if the user has not given a path then, add inputdir. else leave path alone.
217                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
218                                 }
219             }
220             
221             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
222                         if (ffastqfile == "not open") {  abort = true; }        
223                         else if (ffastqfile == "not found") { ffastqfile = ""; }
224                         
225                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
226                         if (rfastqfile == "not open") {  abort = true; }        
227                         else if (rfastqfile == "not found") { rfastqfile = "";  }
228             
229             ffastafile = validParameter.validFile(parameters, "ffasta", true);
230                         if (ffastafile == "not open") {  abort = true; }        
231                         else if (ffastafile == "not found") { ffastafile = ""; }
232                         
233                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
234                         if (rfastafile == "not open") {  abort = true; }        
235                         else if (rfastafile == "not found") { rfastafile = "";  }
236             
237             fqualfile = validParameter.validFile(parameters, "fqfile", true);
238                         if (fqualfile == "not open") {  abort = true; } 
239                         else if (fqualfile == "not found") { fqualfile = ""; }
240                         
241                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
242                         if (rqualfile == "not open") {  abort = true; } 
243                         else if (rqualfile == "not found") { rqualfile = "";  }
244             
245             file = validParameter.validFile(parameters, "file", true);
246                         if (file == "not open") {  abort = true; }      
247                         else if (file == "not found") { file = "";  }
248             
249             //provide at least
250             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
251             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
252             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
253             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
254             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
255             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
256             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
257             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
258             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
259                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
260             }
261             
262             oligosfile = validParameter.validFile(parameters, "oligos", true);
263                         if (oligosfile == "not found")      {   oligosfile = "";        }
264                         else if(oligosfile == "not open")   {   abort = true;       } 
265                         else {   m->setOligosFile(oligosfile);          }
266             
267             //if the user changes the output directory command factory will send this info to us in the output parameter 
268                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
269                  outputDir = ""; 
270             }
271                         
272
273                         //check for optional parameter and set defaults
274                         // ...at some point should added some additional type checking...
275                         string temp;
276                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
277                         m->mothurConvert(temp, match);  
278                         
279                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
280                         m->mothurConvert(temp, misMatch);  
281             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
282                         
283                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
284                         m->mothurConvert(temp, gapOpen);  
285             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
286                         
287                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
288                         m->mothurConvert(temp, gapExtend); 
289             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
290                         
291             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "25";                    }
292                         m->mothurConvert(temp, insert); 
293             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
294
295             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
296                         m->mothurConvert(temp, deltaq);
297             
298                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
299                         m->setProcessors(temp);
300                         m->mothurConvert(temp, processors);
301             
302             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
303                         m->mothurConvert(temp, bdiffs);
304                         
305                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
306                         m->mothurConvert(temp, pdiffs);
307             
308   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
309 //                      m->mothurConvert(temp, ldiffs);
310             ldiffs = 0;
311             
312  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
313  //           m->mothurConvert(temp, sdiffs);
314             sdiffs = 0;
315                         
316                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
317                         m->mothurConvert(temp, tdiffs);
318                         
319                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
320
321             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
322                         allFiles = m->isTrue(temp);
323             
324             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
325                         trimOverlap = m->isTrue(temp);
326                         
327                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
328                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
329             
330             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
331             
332             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
333                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
334                                 abort=true;
335                         }
336             
337             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
338             for (int i = -64; i < 65; i++) { 
339                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
340                 convertTable.push_back(temp);
341             }
342         }
343                 
344         }
345         catch(exception& e) {
346                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
347                 exit(1);
348         }
349 }
350 //**********************************************************************************************************************
351 int MakeContigsCommand::execute(){
352         try {
353                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
354         
355         //read ffastq and rfastq files creating fasta and qual files.
356         //this function will create a forward and reverse, fasta and qual files for each processor.
357         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
358         unsigned long int numReads = 0;
359         int start = time(NULL);
360         longestBase = 1000;
361         m->mothurOut("Reading fastq data...\n"); 
362         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
363         m->mothurOut("Done.\n");
364        
365         if (m->control_pressed) { return 0; }
366         
367         map<string, string> cvars;
368         string compOutputDir = outputDir;
369         if (outputDir == "") { compOutputDir = m->hasPath(file); }
370         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
371         cvars["[tag]"] = "";
372         string compositeGroupFile = getOutputFileName("group",cvars);
373         cvars["[tag]"] = "trim";
374         string compositeFastaFile = getOutputFileName("fasta",cvars);
375         cvars["[tag]"] = "scrap";
376         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
377         cvars["[tag]"] = "";
378         string compositeMisMatchFile = getOutputFileName("report",cvars);
379         
380         if (filesToProcess.size() > 1) { //clear files for append below
381             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
382             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
383             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
384             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
385             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
386             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
387             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
388         }
389         
390         for (int l = 0; l < filesToProcess.size(); l++) {
391             
392             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
393             
394             vector<vector<string> > fastaFileNames;
395             createGroup = false;
396             string outputGroupFileName;
397             map<string, string> variables; 
398             string thisOutputDir = outputDir;
399             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
400             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
401             variables["[tag]"] = "";
402             if(oligosfile != ""){
403                 createGroup = getOligos(fastaFileNames, variables["[filename]"]);
404                 if (createGroup) { 
405                     outputGroupFileName = getOutputFileName("group",variables);
406                     outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
407                 }
408             }
409             
410             variables["[tag]"] = "trim";
411             string outFastaFile = getOutputFileName("fasta",variables);
412             variables["[tag]"] = "scrap";
413             string outScrapFastaFile = getOutputFileName("fasta",variables);
414             variables["[tag]"] = "";
415             string outMisMatchFile = getOutputFileName("report",variables);
416             outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
417             outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
418             outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
419             
420             m->mothurOut("Making contigs...\n"); 
421             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames);
422             m->mothurOut("Done.\n");
423             
424             //remove temp fasta and qual files
425             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
426             
427             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
428             
429             if(allFiles){
430                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
431                 map<string, string>::iterator it;
432                 set<string> namesToRemove;
433                 for(int i=0;i<fastaFileNames.size();i++){
434                     for(int j=0;j<fastaFileNames[0].size();j++){
435                         if (fastaFileNames[i][j] != "") {
436                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
437                                 if(m->isBlank(fastaFileNames[i][j])){
438                                     m->mothurRemove(fastaFileNames[i][j]);
439                                     namesToRemove.insert(fastaFileNames[i][j]);
440                                 }else{  
441                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
442                                     if (it == uniqueFastaNames.end()) { 
443                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
444                                     }   
445                                 }
446                             }
447                         }
448                     }
449                 }
450                 
451                 //remove names for outputFileNames, just cleans up the output
452                 vector<string> outputNames2;
453                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
454                 outputNames = outputNames2;
455                 
456                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
457                     ifstream in;
458                     m->openInputFile(it->first, in);
459                     
460                     ofstream out;
461                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
462                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
463                     m->openOutputFile(thisGroupName, out);
464                     
465                     while (!in.eof()){
466                         if (m->control_pressed) { break; }
467                         
468                         Sequence currSeq(in); m->gobble(in);
469                         out << currSeq.getName() << '\t' << it->second << endl;  
470                     }
471                     in.close();
472                     out.close();
473                 }
474             }
475             
476             if (createGroup) {
477                 ofstream outGroup;
478                 m->openOutputFile(outputGroupFileName, outGroup);
479                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
480                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
481                 }
482                 outGroup.close();
483             }
484             
485             if (filesToProcess.size() > 1) { //merge into large combo files
486                 if (createGroup) {  
487                     if (l == 0) { 
488                         ofstream outCGroup;
489                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
490                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
491                     }
492                     m->appendFiles(outputGroupFileName, compositeGroupFile);  
493                 }
494                 m->appendFiles(outMisMatchFile, compositeMisMatchFile);
495                 m->appendFiles(outFastaFile, compositeFastaFile);
496                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
497             }
498         }
499         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
500         
501         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
502         
503                 //output group counts
504                 m->mothurOutEndLine();
505                 int total = 0;
506                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
507                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
508             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
509                 }
510                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
511                 
512                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
513         
514         string currentFasta = "";
515                 itTypes = outputTypes.find("fasta");
516                 if (itTypes != outputTypes.end()) {
517                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
518                 }
519         
520         string currentGroup = "";
521                 itTypes = outputTypes.find("group");
522                 if (itTypes != outputTypes.end()) {
523                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
524                 }
525                 
526         //output files created by command
527                 m->mothurOutEndLine();
528                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
529                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
530                 m->mothurOutEndLine();
531
532         return 0;
533     }
534         catch(exception& e) {
535                 m->errorOut(e, "MakeContigsCommand", "execute");
536                 exit(1);
537         }
538 }
539 //**********************************************************************************************************************
540 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
541         try {
542         vector< vector< vector<string> > > filesToProcess;
543         
544         if (ffastqfile != "") { //reading one file
545             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile); 
546             //adjust for really large processors or really small files
547             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
548             if (numReads < processors) { 
549                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
550                 files.resize(numReads);
551                 processors = numReads; 
552             }
553             filesToProcess.push_back(files);
554         }else if (file != "") { //reading multiple files
555             //return only valid pairs
556             vector< vector<string> > filePairsToProcess = readFileNames(file);
557             
558             if (m->control_pressed) { return filesToProcess; }
559             
560             if (filePairsToProcess.size() != 0) {
561                 for (int i = 0; i < filePairsToProcess.size(); i++) {
562                     
563                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
564                     
565                     unsigned long int thisFilesReads;
566                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1]); 
567                     
568                     //adjust for really large processors or really small files
569                     if (thisFilesReads < processors) { 
570                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
571                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
572                     }else {
573                         filesToProcess.push_back(files);
574                         numReads += thisFilesReads;
575                     }
576                 }
577                 //all files are bad
578                 if (numReads == 0) {  m->control_pressed = true; }
579             }
580         }else if (ffastafile != "") {
581             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
582             //adjust for really large processors or really small files
583             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
584             if (numReads < processors) { 
585                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
586                 files.resize(numReads);
587                 processors = numReads; 
588             }
589             filesToProcess.push_back(files);
590         }else { m->control_pressed = true; } //should not get here
591         
592         return filesToProcess;
593     }
594         catch(exception& e) {
595                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
596                 exit(1);
597         }
598 }
599 //**********************************************************************************************************************
600 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames) {
601         try {
602                 int num = 0;
603                 vector<int> processIDS;
604 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
605                 int process = 0;
606                 
607                 //loop through and create all the processes you want
608                 while (process != processors-1) {
609                         int pid = fork();
610                         
611                         if (pid > 0) {
612                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
613                                 process++;
614                         }else if (pid == 0){
615                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
616                 
617                                 if(allFiles){
618                                         ofstream temp;
619                     
620                                         for(int i=0;i<tempFASTAFileNames.size();i++){
621                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
622                                                         if (tempFASTAFileNames[i][j] != "") {
623                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
624                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
625                                                         }
626                                                 }
627                                         }
628                                 }
629
630                                 num = driver(files[process], 
631                              outputFasta + toString(getpid()) + ".temp", 
632                              outputScrapFasta + toString(getpid()) + ".temp", 
633                              outputMisMatches + toString(getpid()) + ".temp",
634                              tempFASTAFileNames, process);
635                                 
636                                 //pass groupCounts to parent
637                 ofstream out;
638                 string tempFile = toString(getpid()) + ".num.temp";
639                 m->openOutputFile(tempFile, out);
640                 out << num << endl;
641                                 if(createGroup){
642                                         out << groupCounts.size() << endl;
643                                         
644                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
645                                                 out << it->first << '\t' << it->second << endl;
646                                         }
647                     
648                     out << groupMap.size() << endl;
649                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
650                                                 out << it->first << '\t' << it->second << endl;
651                                         }
652                                 }
653                 out.close();
654                                 
655                                 exit(0);
656                         }else { 
657                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
658                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
659                                 exit(0);
660                         }
661                 }
662                 
663         ofstream temp;
664                 m->openOutputFile(outputFasta, temp);           temp.close();
665         m->openOutputFile(outputScrapFasta, temp);              temp.close();
666                 
667                 //do my part
668                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1);
669                 
670                 //force parent to wait until all the processes are done
671                 for (int i=0;i<processIDS.size();i++) { 
672                         int temp = processIDS[i];
673                         wait(&temp);
674                 }
675         
676                 for (int i = 0; i < processIDS.size(); i++) {
677             ifstream in;
678             string tempFile = toString(processIDS[i]) + ".num.temp";
679             m->openInputFile(tempFile, in);
680             int tempNum;
681             in >> tempNum; num += tempNum; m->gobble(in);
682             
683                         if(createGroup){
684                                 string group;
685                                 in >> tempNum; m->gobble(in);
686                                 
687                                 if (tempNum != 0) {
688                                         for (int j = 0; j < tempNum; j++) { 
689                         int groupNum;
690                                                 in >> group >> groupNum; m->gobble(in);
691                         
692                                                 map<string, int>::iterator it = groupCounts.find(group);
693                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
694                                                 else { groupCounts[it->first] += groupNum; }
695                                         }
696                                 }
697                 in >> tempNum; m->gobble(in);
698                 if (tempNum != 0) {
699                                         for (int j = 0; j < tempNum; j++) { 
700                         string group, seqName;
701                                                 in >> seqName >> group; m->gobble(in);
702                         
703                                                 map<string, string>::iterator it = groupMap.find(seqName);
704                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
705                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
706                                         }
707                                 }
708                         }
709             in.close(); m->mothurRemove(tempFile);
710         }
711     #else
712         
713         //////////////////////////////////////////////////////////////////////////////////////////////////////
714                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
715                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
716                 //////////////////////////////////////////////////////////////////////////////////////////////////////
717                 
718                 vector<contigsData*> pDataArray; 
719                 DWORD   dwThreadIdArray[processors-1];
720                 HANDLE  hThreadArray[processors-1]; 
721                 
722                 //Create processor worker threads.
723                 for( int h=0; h<processors-1; h++ ){
724                         string extension = "";
725                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
726             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
727                         
728             if(allFiles){
729                 ofstream temp;
730                 
731                 for(int i=0;i<tempFASTAFileNames.size();i++){
732                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
733                         if (tempFASTAFileNames[i][j] != "") {
734                             tempFASTAFileNames[i][j] += extension;
735                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
736                         }
737                     }
738                 }
739             }
740
741                                   
742                         contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h);
743                         pDataArray.push_back(tempcontig);
744             
745                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
746                 }
747         
748         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
749
750         if(allFiles){
751             ofstream temp;
752             string extension = toString(processors-1) + ".temp";
753             
754             for(int i=0;i<tempFASTAFileNames.size();i++){
755                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
756                     if (tempFASTAFileNames[i][j] != "") {
757                         tempFASTAFileNames[i][j] += extension;
758                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
759                     }
760                 }
761             }
762         }
763
764                 //parent do my part
765                 ofstream temp;
766                 m->openOutputFile(outputFasta, temp);           temp.close();
767         m->openOutputFile(outputScrapFasta, temp);              temp.close();
768         
769         //do my part
770         processIDS.push_back(processors-1);
771                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1);     
772         
773                 //Wait until all threads have terminated.
774                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
775                 
776                 //Close all thread handles and free memory allocations.
777                 for(int i=0; i < pDataArray.size(); i++){
778                         num += pDataArray[i]->count;
779             if (!pDataArray[i]->done) {
780                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
781             }
782             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
783                 map<string, int>::iterator it2 = groupCounts.find(it->first);
784                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
785                 else { groupCounts[it->first] += it->second; }
786             }
787             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
788                 map<string, string>::iterator it2 = groupMap.find(it->first);
789                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
790                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
791             }
792             CloseHandle(hThreadArray[i]);
793                         delete pDataArray[i];
794         }
795                                 
796     #endif      
797         
798         for (int i = 0; i < processIDS.size(); i++) {
799                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
800                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
801                         
802                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
803                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
804             
805             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
806                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
807             
808             if(allFiles){
809                                 for(int j=0;j<fastaFileNames.size();j++){
810                                         for(int k=0;k<fastaFileNames[j].size();k++){
811                                                 if (fastaFileNames[j][k] != "") {
812                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
813                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
814                                                 }
815                                         }
816                                 }
817                         }
818                 }
819                 
820                 return num;
821         }
822         catch(exception& e) {
823                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
824                 exit(1);
825         }
826 }
827 //**********************************************************************************************************************
828 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process){
829     try {
830         
831         Alignment* alignment;
832         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
833                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
834         
835         int num = 0;
836         string thisffastafile = files[0];
837         string thisfqualfile = files[1];
838         string thisrfastafile = files[2];
839         string thisrqualfile = files[3];
840         
841         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
842         
843         ifstream inFFasta, inRFasta, inFQual, inRQual;
844         ofstream outFasta, outMisMatch, outScrapFasta;
845         m->openInputFile(thisffastafile, inFFasta);
846         m->openInputFile(thisrfastafile, inRFasta);
847         if (thisfqualfile != "") {
848             m->openInputFile(thisfqualfile, inFQual);
849             m->openInputFile(thisrqualfile, inRQual);
850         }
851         m->openOutputFile(outputFasta, outFasta);
852         m->openOutputFile(outputScrapFasta, outScrapFasta);
853         m->openOutputFile(outputMisMatches, outMisMatch);
854         if (process == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  }
855         
856         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
857         
858         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
859             
860             if (m->control_pressed) { break; }
861             
862             int success = 1;
863             string trashCode = "";
864             int currentSeqsDiffs = 0;
865
866             //read seqs and quality info
867             Sequence fSeq(inFFasta); m->gobble(inFFasta);
868             Sequence rSeq(inRFasta); m->gobble(inRFasta);
869             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
870             if (thisfqualfile != "") {
871                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
872                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
873             }
874             
875             int barcodeIndex = 0;
876             int primerIndex = 0;
877             
878             if(barcodes.size() != 0){
879                 if (thisfqualfile != "") {
880                     success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
881                 }else {
882                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
883                 }
884                 if(success > bdiffs)            {       trashCode += 'b';       }
885                 else{ currentSeqsDiffs += success;  }
886             }
887             
888             if(primers.size() != 0){
889                 if (thisfqualfile != "") {
890                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
891                 }else {
892                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
893                 }
894                 if(success > pdiffs)            {       trashCode += 'f';       }
895                 else{ currentSeqsDiffs += success;  }
896             }
897             
898             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
899             
900             //flip the reverse reads
901             rSeq.reverseComplement();
902             if (thisfqualfile != "") { rQual->flipQScores(); }
903
904             //pairwise align
905             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
906             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
907             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
908             fSeq.setAligned(alignment->getSeqAAln());
909             rSeq.setAligned(alignment->getSeqBAln());
910             int length = fSeq.getAligned().length();
911             
912             //traverse alignments merging into one contiguous seq
913             string contig = "";
914             int numMismatches = 0;
915             string seq1 = fSeq.getAligned();
916             string seq2 = rSeq.getAligned();
917             vector<int> scores1, scores2; 
918             if (thisfqualfile != "") {
919                 scores1 = fQual->getQualityScores();
920                 scores2 = rQual->getQualityScores();
921                 delete fQual; delete rQual;
922             }
923             
924             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
925             int overlapStart = fSeq.getStartPos();
926             int seq2Start = rSeq.getStartPos();
927             
928             //bigger of the 2 starting positions is the location of the overlapping start
929             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
930                 overlapStart = seq2Start; 
931                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
932             }else { //seq1 starts later so take from 0 to overlapStart from seq2
933                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
934             }
935             
936             int seq1End = fSeq.getEndPos();
937             int seq2End = rSeq.getEndPos();
938             int overlapEnd = seq1End;
939             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
940             
941             int oStart = contig.length();
942             for (int i = overlapStart; i < overlapEnd; i++) {
943                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
944                     contig += seq1[i];
945                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
946                     if (thisfqualfile != "") {
947                         if (scores2[BBaseMap[i]] < insert) { } //
948                         else { contig += seq2[i];  }
949                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
950                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
951                     if (thisfqualfile != "") {
952                         if (scores1[ABaseMap[i]] < insert) { } //
953                         else { contig += seq1[i];  }
954                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
955                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
956                     if (thisfqualfile != "") {
957                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
958                             char c = seq1[i];
959                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
960                             contig += c;
961                         }else { //if no, base becomes n
962                             contig += 'N';
963                         }
964                         numMismatches++;
965                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
966                 }else { //should never get here
967                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
968                 }
969             }
970             int oend = contig.length();
971             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
972                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
973             }else { //seq2 ends before seq1 so take from overlap to length from seq1
974                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
975             }
976             
977             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
978             
979             if(trashCode.length() == 0){
980                 bool ignore = false;
981                 
982                 if (m->debug) { m->mothurOut(fSeq.getName()); }
983                 
984                 if (createGroup) {
985                     if(barcodes.size() != 0){
986                         string thisGroup = barcodeNameVector[barcodeIndex];
987                         if (primers.size() != 0) { 
988                             if (primerNameVector[primerIndex] != "") { 
989                                 if(thisGroup != "") {
990                                     thisGroup += "." + primerNameVector[primerIndex]; 
991                                 }else {
992                                     thisGroup = primerNameVector[primerIndex]; 
993                                 }
994                             } 
995                         }
996                         
997                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
998                         
999                         int pos = thisGroup.find("ignore");
1000                         if (pos == string::npos) {
1001                             groupMap[fSeq.getName()] = thisGroup; 
1002                         
1003                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1004                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1005                             else { groupCounts[it->first] ++; }
1006                         }else { ignore = true; }
1007                         
1008                     }
1009                 }
1010                 if (m->debug) { m->mothurOut("\n"); }
1011                 
1012                 if(allFiles && !ignore){
1013                     ofstream output;
1014                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1015                     output << ">" << fSeq.getName() << endl << contig << endl;
1016                     output.close();
1017                 }
1018                 
1019                 //output
1020                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1021                 int numNs = 0;
1022                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1023                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1024             }else {
1025                 //output
1026                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1027             }
1028             num++;
1029             
1030                         //report progress
1031                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1032                 }
1033         
1034                 //report progress
1035                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1036         
1037         inFFasta.close();
1038         inRFasta.close();
1039         outFasta.close();
1040         outScrapFasta.close();
1041         outMisMatch.close();
1042         if (thisfqualfile != "") {
1043             inFQual.close();
1044             inRQual.close();
1045         }
1046         delete alignment;
1047         
1048         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1049     
1050         return num;
1051     }
1052         catch(exception& e) {
1053                 m->errorOut(e, "MakeContigsCommand", "driver");
1054                 exit(1);
1055         }
1056 }
1057 //**********************************************************************************************************************
1058 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq){
1059     try {
1060         vector< vector<string> > files;
1061         //maps processors number to file pointer
1062         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1063         map<int, vector<ofstream*> >::iterator it;
1064         
1065         //create files to write to
1066         for (int i = 0; i < processors; i++) {
1067             vector<ofstream*> temp;
1068             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1069             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1070             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1071             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1072             tempfiles[i] = temp;
1073             
1074             vector<string> names;
1075             string thisOutputDir = outputDir;
1076             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1077             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1078             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1079             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1080             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1081             names.push_back(ffastafilename); names.push_back(fqualfilename);
1082             names.push_back(rfastafilename); names.push_back(rqualfilename);
1083             files.push_back(names);
1084             
1085             m->openOutputFile(ffastafilename, *outFF);
1086             m->openOutputFile(rfastafilename, *outRF);
1087             m->openOutputFile(fqualfilename, *outFQ);
1088             m->openOutputFile(rqualfilename, *outRQ);
1089         }
1090         
1091         if (m->control_pressed) {
1092             //close files, delete ofstreams
1093             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1094             //remove files
1095             for (int i = 0; i < files.size(); i++) {  
1096                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1097             }
1098         }
1099         
1100         ifstream inForward;
1101         m->openInputFile(ffastq, inForward);
1102         
1103         ifstream inReverse;
1104         m->openInputFile(rfastq, inReverse);
1105         
1106         count = 0;
1107         map<string, fastqRead> uniques;
1108         map<string, fastqRead>::iterator itUniques;
1109         while ((!inForward.eof()) || (!inReverse.eof())) {
1110             
1111             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
1112             
1113             //get a read from forward and reverse fastq files
1114             bool ignoref, ignorer;
1115             fastqRead thisFread, thisRread;
1116             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1117             else { ignoref = true; }
1118             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1119             else { ignorer = true; }
1120             
1121             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
1122            
1123             for (int i = 0; i < reads.size(); i++) {
1124                 fastqRead fread = reads[i].forward;
1125                 fastqRead rread = reads[i].reverse;
1126                 
1127                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1128                
1129                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1130                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
1131                     
1132                     //if the reads are okay write to output files
1133                     int process = count % processors;
1134                     
1135                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1136                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1137                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1138                     *(tempfiles[process][1]) << endl;
1139                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1140                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1141                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1142                     *(tempfiles[process][3]) << endl;
1143                     
1144                     count++;
1145                     
1146                     //report progress
1147                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1148                 //}
1149             }
1150                 }
1151                 //report progress
1152                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1153         
1154         if (uniques.size() != 0) {
1155             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1156                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1157             }
1158             m->mothurOutEndLine();
1159         }
1160         
1161         //close files, delete ofstreams
1162         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1163         inForward.close();
1164         inReverse.close();
1165         
1166         return files;
1167     }
1168     catch(exception& e) {
1169         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1170         exit(1);
1171     }
1172 }
1173 //**********************************************************************************************************************
1174 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1175     try {
1176         vector< vector<string> > files;
1177         //maps processors number to file pointer
1178         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1179         map<int, vector<ofstream*> >::iterator it;
1180         
1181         //create files to write to
1182         for (int i = 0; i < processors; i++) {
1183             vector<ofstream*> temp;
1184             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1185             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1186             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1187             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1188             tempfiles[i] = temp;
1189             
1190             vector<string> names;
1191             string thisOutputDir = outputDir;
1192             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1193             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1194             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1195             string fqualfilename = "";
1196             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1197             string rqualfilename = "";
1198             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1199             names.push_back(ffastafilename); names.push_back(fqualfilename);
1200             names.push_back(rfastafilename); names.push_back(rqualfilename);
1201             files.push_back(names);
1202             
1203             m->openOutputFile(ffastafilename, *outFF);
1204             m->openOutputFile(rfastafilename, *outRF);
1205         }
1206         
1207         if (m->control_pressed) {
1208             //close files, delete ofstreams
1209             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1210             //remove files
1211             for (int i = 0; i < files.size(); i++) {  
1212                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1213             }
1214         }
1215         
1216         ifstream inForwardFasta;
1217         m->openInputFile(ffasta, inForwardFasta);
1218         
1219         ifstream inReverseFasta;
1220         m->openInputFile(rfasta, inReverseFasta);
1221         
1222         ifstream inForwardQual, inReverseQual;
1223         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1224         
1225         count = 0;
1226         map<string, fastqRead> uniques;
1227         map<string, fastqRead>::iterator itUniques;
1228         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1229             
1230             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1231             
1232             //get a reads from forward and reverse fasta files
1233             bool ignoref, ignorer;
1234             fastqRead thisFread, thisRread;
1235             if (!inForwardFasta.eof()) {  
1236                 ignoref = false; 
1237                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1238                 thisFread.name = temp.getName();
1239                 thisFread.sequence = temp.getUnaligned();
1240             }else { ignoref = true; }
1241             if (!inReverseFasta.eof()) {  
1242                 ignorer = false; 
1243                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1244                 thisRread.name = temp.getName();
1245                 thisRread.sequence = temp.getUnaligned();  
1246             }else { ignorer = true; }
1247             
1248             //get qual reads if given
1249             if (fqualfile != "") {
1250                 if (!inForwardQual.eof() && !ignoref) {  
1251                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1252                     //if forward files dont match ignore read
1253                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1254                     else { thisFread.scores = temp.getQualityScores(); }
1255                 }else { ignoref = true; }
1256                 if (!inReverseQual.eof() && !ignorer) {  
1257                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1258                     //if reverse files dont match ignore read
1259                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1260                     else { thisRread.scores = temp.getQualityScores(); }
1261                 }else { ignorer = true; }
1262             }
1263             
1264             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
1265             
1266             for (int i = 0; i < reads.size(); i++) {
1267                 fastqRead fread = reads[i].forward;
1268                 fastqRead rread = reads[i].reverse;
1269                 
1270                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1271                 
1272                // if (checkReads(fread, rread, ffasta, rfasta)) {
1273                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1274                     
1275                     //if the reads are okay write to output files
1276                     int process = count % processors;
1277                     
1278                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1279                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1280                     if (fqualfile != "") { //if you have quality info, print it
1281                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1282                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1283                         *(tempfiles[process][1]) << endl;
1284                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1285                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1286                         *(tempfiles[process][3]) << endl;
1287                     }
1288                     count++;
1289                     
1290                     //report progress
1291                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1292                 //}
1293             }
1294                 }
1295                 //report progress
1296                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1297         
1298         if (uniques.size() != 0) {
1299             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1300                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1301             }
1302             m->mothurOutEndLine();
1303         }
1304         
1305         //close files, delete ofstreams
1306         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1307         inReverseFasta.close(); 
1308         inForwardFasta.close(); 
1309         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1310         
1311         return files;
1312     }
1313     catch(exception& e) {
1314         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1315         exit(1);
1316     }
1317 }
1318 //**********************************************************************************************************************
1319 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques){
1320     try {
1321         vector<pairFastqRead> reads;
1322         map<string, fastqRead>::iterator itUniques;
1323             
1324         if (!ignoref && !ignorer) {
1325             if (forward.name == reverse.name) { 
1326                 pairFastqRead temp(forward, reverse);
1327                 reads.push_back(temp);
1328             }else {
1329                 bool match = false;
1330                 //if no match are the names only different by 1 and 2?
1331                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1332                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1333                 if (tempFRead == tempRRead) {
1334                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1335                         forward.name = tempFRead;
1336                         reverse.name = tempRRead;
1337                         pairFastqRead temp(forward, reverse);
1338                         reads.push_back(temp);
1339                         match = true;
1340                     }
1341                 }
1342                 
1343                 if (!match) {
1344                     //look for forward pair
1345                     itUniques = uniques.find(forward.name);
1346                     if (itUniques != uniques.end()) {  //we have the pair for this read
1347                         pairFastqRead temp(forward, itUniques->second);
1348                         reads.push_back(temp);
1349                         uniques.erase(itUniques);
1350                     }else { //save this read for later
1351                         uniques[forward.name] = forward;
1352                     }
1353                     
1354                     //look for reverse pair
1355                     itUniques = uniques.find(reverse.name);
1356                     if (itUniques != uniques.end()) {  //we have the pair for this read
1357                         pairFastqRead temp(itUniques->second, reverse);
1358                         reads.push_back(temp);
1359                         uniques.erase(itUniques);
1360                     }else { //save this read for later
1361                         uniques[reverse.name] = reverse;
1362                     }
1363                 }
1364                                 
1365             }
1366         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1367             //look for forward pair
1368             itUniques = uniques.find(forward.name);
1369             if (itUniques != uniques.end()) {  //we have the pair for this read
1370                 pairFastqRead temp(forward, itUniques->second);
1371                 reads.push_back(temp);
1372                 uniques.erase(itUniques);
1373             }else { //save this read for later
1374                 uniques[forward.name] = forward;
1375             }
1376
1377         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1378             //look for reverse pair
1379             itUniques = uniques.find(reverse.name);
1380             if (itUniques != uniques.end()) {  //we have the pair for this read
1381                 pairFastqRead temp(itUniques->second, reverse);
1382                 reads.push_back(temp);
1383                 uniques.erase(itUniques);
1384             }else { //save this read for later
1385                 uniques[reverse.name] = reverse;
1386             }
1387         }//else ignore both and do nothing
1388         
1389         return reads;
1390     }
1391     catch(exception& e) {
1392         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1393         exit(1);
1394     }
1395 }
1396 //**********************************************************************************************************************
1397 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1398     try {
1399         fastqRead read;
1400         
1401         ignore = false;
1402         
1403         //read sequence name
1404         string line = m->getline(in); m->gobble(in);
1405         vector<string> pieces = m->splitWhiteSpace(line);
1406         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1407         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1408         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1409         else { name = name.substr(1); }
1410         
1411         //read sequence
1412         string sequence = m->getline(in); m->gobble(in);
1413         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1414         
1415         //read sequence name
1416         line = m->getline(in); m->gobble(in);
1417         pieces = m->splitWhiteSpace(line);
1418         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1419         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1420         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1421         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1422         
1423         //read quality scores
1424         string quality = m->getline(in); m->gobble(in);
1425         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1426          
1427         //sanity check sequence length and number of quality scores match
1428         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1429         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1430         
1431         vector<int> qualScores = convertQual(quality);
1432         
1433         read.name = name;
1434         read.sequence = sequence;
1435         read.scores = qualScores;
1436
1437         return read;
1438     }
1439     catch(exception& e) {
1440         m->errorOut(e, "MakeContigsCommand", "readFastq");
1441         exit(1);
1442     }
1443 }
1444 /**********************************************************************************************************************
1445 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1446     try {
1447         bool good = true;
1448         
1449         //do sequence lengths match
1450         if (forward.sequence.length() != reverse.sequence.length()) {
1451             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1452             good = false; 
1453         }
1454         
1455         //do number of qual scores match 
1456         if (forward.scores.size() != reverse.scores.size()) {
1457             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1458             good = false; 
1459         }
1460
1461         return good;
1462     }
1463     catch(exception& e) {
1464         m->errorOut(e, "MakeContigsCommand", "checkReads");
1465         exit(1);
1466     }
1467 }*/
1468 //***************************************************************************************************************
1469 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1470         try {
1471         vector< vector<string> > files;
1472         string forward, reverse;
1473         
1474         ifstream in;
1475         m->openInputFile(filename, in);
1476         
1477         while(!in.eof()) {
1478             
1479             if (m->control_pressed) { return files; }
1480             
1481             in >> forward; m->gobble(in);
1482             in >> reverse; m->gobble(in);
1483             
1484             //check to make sure both are able to be opened
1485             ifstream in2;
1486             int openForward = m->openInputFile(forward, in2, "noerror");
1487             
1488             //if you can't open it, try default location
1489             if (openForward == 1) {
1490                 if (m->getDefaultPath() != "") { //default path is set
1491                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1492                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1493                     ifstream in3;
1494                     openForward = m->openInputFile(tryPath, in3, "noerror");
1495                     in3.close();
1496                     forward = tryPath;
1497                 }
1498             }
1499             
1500             //if you can't open it, try output location
1501             if (openForward == 1) {
1502                 if (m->getOutputDir() != "") { //default path is set
1503                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1504                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1505                     ifstream in4;
1506                     openForward = m->openInputFile(tryPath, in4, "noerror");
1507                     forward = tryPath;
1508                     in4.close();
1509                 }
1510             }
1511             
1512             if (openForward == 1) { //can't find it
1513                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1514             }else{  in2.close();  }
1515             
1516             ifstream in3;
1517             int openReverse = m->openInputFile(reverse, in3, "noerror");
1518             
1519             //if you can't open it, try default location
1520             if (openReverse == 1) {
1521                 if (m->getDefaultPath() != "") { //default path is set
1522                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1523                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1524                     ifstream in3;
1525                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1526                     in3.close();
1527                     reverse = tryPath;
1528                 }
1529             }
1530             
1531             //if you can't open it, try output location
1532             if (openReverse == 1) {
1533                 if (m->getOutputDir() != "") { //default path is set
1534                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1535                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1536                     ifstream in4;
1537                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1538                     reverse = tryPath;
1539                     in4.close();
1540                 }
1541             }
1542             
1543             if (openReverse == 1) { //can't find it
1544                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1545             }else{  in3.close();  }
1546             
1547             if ((openForward != 1) && (openReverse != 1)) { //good pair
1548                 vector<string> pair;
1549                 pair.push_back(forward);
1550                 pair.push_back(reverse);
1551                 files.push_back(pair);
1552             }
1553             
1554         }
1555         in.close();
1556         
1557         return files;
1558     }
1559     catch(exception& e) {
1560         m->errorOut(e, "MakeContigsCommand", "checkReads");
1561         exit(1);
1562     }
1563 }
1564 //***************************************************************************************************************
1565 //illumina data requires paired forward and reverse data
1566 //BARCODE   atgcatgc   atgcatgc    groupName 
1567 //PRIMER   atgcatgc   atgcatgc    groupName  
1568 //PRIMER   atgcatgc   atgcatgc  
1569 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1570         try {
1571                 ifstream in;
1572                 m->openInputFile(oligosfile, in);
1573                 
1574                 ofstream test;
1575                 
1576                 string type, foligo, roligo, group;
1577         
1578                 int indexPrimer = 0;
1579                 int indexBarcode = 0;
1580         set<string> uniquePrimers;
1581         set<string> uniqueBarcodes;
1582                 
1583                 while(!in.eof()){
1584             
1585                         in >> type; 
1586     
1587                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1588             
1589                         if(type[0] == '#'){
1590                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1591                                 m->gobble(in);
1592                         }
1593                         else{
1594                                 m->gobble(in);
1595                                 //make type case insensitive
1596                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1597                                 
1598                                 in >> foligo;
1599                 
1600                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1601                                 
1602                                 for(int i=0;i<foligo.length();i++){
1603                                         foligo[i] = toupper(foligo[i]);
1604                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1605                                 }
1606                                 
1607                                 if(type == "FORWARD"){
1608                                         m->gobble(in);
1609                                         
1610                     in >> roligo;
1611                     
1612                     for(int i=0;i<roligo.length();i++){
1613                         roligo[i] = toupper(roligo[i]);
1614                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1615                     }
1616                     //roligo = reverseOligo(roligo);
1617                     
1618                     group = "";
1619                     
1620                                         // get rest of line in case there is a primer name
1621                                         while (!in.eof())       {       
1622                                                 char c = in.get(); 
1623                                                 if (c == 10 || c == 13){        break;  }
1624                                                 else if (c == 32 || c == 9){;} //space or tab
1625                                                 else {  group += c;  }
1626                                         } 
1627                     
1628                     oligosPair newPrimer(foligo, roligo);
1629                                         
1630                                         //check for repeat barcodes
1631                     string tempPair = foligo+roligo;
1632                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1633                     else { uniquePrimers.insert(tempPair); }
1634                                         
1635                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1636                     
1637                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1638                                         primerNameVector.push_back(group);
1639                                 }else if(type == "BARCODE"){
1640                                         m->gobble(in);
1641                                         
1642                     in >> roligo;
1643                     
1644                     for(int i=0;i<roligo.length();i++){
1645                         roligo[i] = toupper(roligo[i]);
1646                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1647                     }
1648                     //roligo = reverseOligo(roligo);
1649                     
1650                     oligosPair newPair(foligo, roligo);
1651                     
1652                     group = "";
1653                     while (!in.eof())   {       
1654                                                 char c = in.get(); 
1655                                                 if (c == 10 || c == 13){        break;  }
1656                                                 else if (c == 32 || c == 9){;} //space or tab
1657                                                 else {  group += c;  }
1658                                         } 
1659                                         
1660                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1661                         
1662                     //check for repeat barcodes
1663                     string tempPair = foligo+roligo;
1664                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1665                     else { uniqueBarcodes.insert(tempPair); }
1666                         
1667                     barcodes[indexBarcode]=newPair; indexBarcode++;
1668                                         barcodeNameVector.push_back(group);
1669                                 }else if(type == "LINKER"){
1670                                         linker.push_back(foligo);
1671                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
1672                                 }else if(type == "SPACER"){
1673                                         spacer.push_back(foligo);
1674                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
1675                                 }
1676                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
1677                         }
1678                         m->gobble(in);
1679                 }       
1680                 in.close();
1681                 
1682                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1683                 
1684                 //add in potential combos
1685                 if(barcodeNameVector.size() == 0){
1686             oligosPair temp("", "");
1687                         barcodes[0] = temp;
1688                         barcodeNameVector.push_back("");                        
1689                 }
1690                 
1691                 if(primerNameVector.size() == 0){
1692             oligosPair temp("", "");
1693                         primers[0] = temp;
1694                         primerNameVector.push_back("");                 
1695                 }
1696                 
1697                 fastaFileNames.resize(barcodeNameVector.size());
1698                 for(int i=0;i<fastaFileNames.size();i++){
1699                         fastaFileNames[i].assign(primerNameVector.size(), "");
1700                 }
1701                 
1702                 if(allFiles){
1703                         set<string> uniqueNames; //used to cleanup outputFileNames
1704                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1705                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1706                                         
1707                                         string primerName = primerNameVector[itPrimer->first];
1708                                         string barcodeName = barcodeNameVector[itBar->first];
1709                     
1710                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1711                                         else {
1712                         string comboGroupName = "";
1713                         string fastaFileName = "";
1714                         string qualFileName = "";
1715                         string nameFileName = "";
1716                         string countFileName = "";
1717                         
1718                         if(primerName == ""){
1719                             comboGroupName = barcodeNameVector[itBar->first];
1720                         }
1721                         else{
1722                             if(barcodeName == ""){
1723                                 comboGroupName = primerNameVector[itPrimer->first];
1724                             }
1725                             else{
1726                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1727                             }
1728                         }
1729                         
1730                         
1731                         ofstream temp;
1732                         fastaFileName = rootname + comboGroupName + ".fasta";
1733                         if (uniqueNames.count(fastaFileName) == 0) {
1734                             outputNames.push_back(fastaFileName);
1735                             outputTypes["fasta"].push_back(fastaFileName);
1736                             uniqueNames.insert(fastaFileName);
1737                         }
1738                         
1739                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1740                         m->openOutputFile(fastaFileName, temp);         temp.close();
1741                     }
1742                                 }
1743                         }
1744                 }
1745                 
1746                 bool allBlank = true;
1747                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1748                         if (barcodeNameVector[i] != "") {
1749                                 allBlank = false;
1750                                 break;
1751                         }
1752                 }
1753                 for (int i = 0; i < primerNameVector.size(); i++) {
1754                         if (primerNameVector[i] != "") {
1755                                 allBlank = false;
1756                                 break;
1757                         }
1758                 }
1759         
1760                 if (allBlank) {
1761                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1762                         allFiles = false;
1763                         return false;
1764                 }
1765                 
1766                 return true;
1767                 
1768         }
1769         catch(exception& e) {
1770                 m->errorOut(e, "MakeContigsCommand", "getOligos");
1771                 exit(1);
1772         }
1773 }
1774 //********************************************************************/
1775 string MakeContigsCommand::reverseOligo(string oligo){
1776         try {
1777         string reverse = "";
1778         
1779         for(int i=oligo.length()-1;i>=0;i--){
1780             
1781             if(oligo[i] == 'A')         {       reverse += 'T'; }
1782             else if(oligo[i] == 'T'){   reverse += 'A'; }
1783             else if(oligo[i] == 'U'){   reverse += 'A'; }
1784             
1785             else if(oligo[i] == 'G'){   reverse += 'C'; }
1786             else if(oligo[i] == 'C'){   reverse += 'G'; }
1787             
1788             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1789             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1790             
1791             else if(oligo[i] == 'M'){   reverse += 'K'; }
1792             else if(oligo[i] == 'K'){   reverse += 'M'; }
1793             
1794             else if(oligo[i] == 'W'){   reverse += 'W'; }
1795             else if(oligo[i] == 'S'){   reverse += 'S'; }
1796             
1797             else if(oligo[i] == 'B'){   reverse += 'V'; }
1798             else if(oligo[i] == 'V'){   reverse += 'B'; }
1799             
1800             else if(oligo[i] == 'D'){   reverse += 'H'; }
1801             else if(oligo[i] == 'H'){   reverse += 'D'; }
1802             
1803             else                                                {       reverse += 'N'; }
1804         }
1805         
1806         
1807         return reverse;
1808     }
1809         catch(exception& e) {
1810                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
1811                 exit(1);
1812         }
1813 }
1814 //**********************************************************************************************************************
1815 vector<int> MakeContigsCommand::convertQual(string qual) {
1816         try {
1817                 vector<int> qualScores;
1818         bool negativeScores = false;
1819                 
1820                 for (int i = 0; i < qual.length(); i++) { 
1821             
1822             int temp = 0;
1823             temp = int(qual[i]);
1824             if (format == "illumina") {
1825                 temp -= 64; //char '@'
1826             }else if (format == "illumina1.8+") {
1827                     temp -= int('!'); //char '!'
1828             }else if (format == "solexa") {
1829                 temp = int(convertTable[temp]); //convert to sanger
1830                 temp -= int('!'); //char '!'
1831             }else {
1832                 temp -= int('!'); //char '!'
1833             }
1834             
1835             if (temp < -5) { negativeScores = true; }
1836                         qualScores.push_back(temp);
1837                 }
1838                 
1839         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
1840         
1841                 return qualScores;
1842         }
1843         catch(exception& e) {
1844                 m->errorOut(e, "MakeContigsCommand", "convertQual");
1845                 exit(1);
1846         }
1847 }
1848
1849 //**********************************************************************************************************************
1850
1851
1852
1853