]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22         CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23         CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57         helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60         helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
63         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
64                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65         helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process.  \n";
66         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
67         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
74         helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
75         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
76                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
77                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
78         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
79         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
80         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
81
82         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
83         helpString += "The make.contigs command should be in the following format: \n";
84                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
85                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93 //**********************************************************************************************************************
94 string MakeContigsCommand::getOutputPattern(string type) {
95     try {
96         string pattern = "";
97         
98         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
99         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
100         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
101         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
102         
103         return pattern;
104     }
105     catch(exception& e) {
106         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
107         exit(1);
108     }
109 }
110 //**********************************************************************************************************************
111 MakeContigsCommand::MakeContigsCommand(){       
112         try {
113                 abort = true; calledHelp = true; 
114                 setParameters();
115                 vector<string> tempOutNames;
116                 outputTypes["fasta"] = tempOutNames;
117         outputTypes["group"] = tempOutNames;
118         outputTypes["report"] = tempOutNames;
119     }
120         catch(exception& e) {
121                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
122                 exit(1);
123         }
124 }
125 //**********************************************************************************************************************
126 MakeContigsCommand::MakeContigsCommand(string option)  {
127         try {
128                 abort = false; calledHelp = false;
129         createFileGroup = false; createOligosGroup = false;
130         
131                 //allow user to run help
132                 if(option == "help") { help(); abort = true; calledHelp = true; }
133                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
134                 
135                 else {
136                         vector<string> myArray = setParameters();
137                         
138                         OptionParser parser(option);
139                         map<string, string> parameters = parser.getParameters(); 
140                         
141                         ValidParameters validParameter("pairwise.seqs");
142                         map<string, string>::iterator it;
143                         
144                         //check to make sure all parameters are valid for command
145                         for (it = parameters.begin(); it != parameters.end(); it++) { 
146                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
147                         }
148                         
149                         //initialize outputTypes
150                         vector<string> tempOutNames;
151                         outputTypes["fasta"] = tempOutNames;
152             outputTypes["report"] = tempOutNames;
153             outputTypes["group"] = tempOutNames;
154                         
155             
156                         //if the user changes the input directory command factory will send this info to us in the output parameter 
157                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
158                         if (inputDir == "not found"){   inputDir = "";          }
159                         else { 
160                                 string path;
161                 it = parameters.find("ffastq");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
167                                 }
168                 
169                 it = parameters.find("rfastq");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
175                                 }
176                 
177                 it = parameters.find("ffasta");
178                                 //user has given a template file
179                                 if(it != parameters.end()){ 
180                                         path = m->hasPath(it->second);
181                                         //if the user has not given a path then, add inputdir. else leave path alone.
182                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
183                                 }
184                 
185                 it = parameters.find("rfasta");
186                                 //user has given a template file
187                                 if(it != parameters.end()){ 
188                                         path = m->hasPath(it->second);
189                                         //if the user has not given a path then, add inputdir. else leave path alone.
190                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
191                                 }
192                 
193                 it = parameters.find("fqfile");
194                                 //user has given a template file
195                                 if(it != parameters.end()){ 
196                                         path = m->hasPath(it->second);
197                                         //if the user has not given a path then, add inputdir. else leave path alone.
198                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
199                                 }
200                 
201                 it = parameters.find("rqfile");
202                                 //user has given a template file
203                                 if(it != parameters.end()){ 
204                                         path = m->hasPath(it->second);
205                                         //if the user has not given a path then, add inputdir. else leave path alone.
206                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
207                                 }
208                 
209                 it = parameters.find("file");
210                                 //user has given a template file
211                                 if(it != parameters.end()){ 
212                                         path = m->hasPath(it->second);
213                                         //if the user has not given a path then, add inputdir. else leave path alone.
214                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
215                                 }
216                 
217                 it = parameters.find("oligos");
218                                 //user has given a template file
219                                 if(it != parameters.end()){ 
220                                         path = m->hasPath(it->second);
221                                         //if the user has not given a path then, add inputdir. else leave path alone.
222                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
223                                 }
224                 
225                 it = parameters.find("findex");
226                                 //user has given a template file
227                                 if(it != parameters.end()){
228                                         path = m->hasPath(it->second);
229                                         //if the user has not given a path then, add inputdir. else leave path alone.
230                                         if (path == "") {       parameters["findex"] = inputDir + it->second;           }
231                                 }
232                 
233                 it = parameters.find("rindex");
234                                 //user has given a template file
235                                 if(it != parameters.end()){
236                                         path = m->hasPath(it->second);
237                                         //if the user has not given a path then, add inputdir. else leave path alone.
238                                         if (path == "") {       parameters["rindex"] = inputDir + it->second;           }
239                                 }
240             }
241             
242             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
243                         if (ffastqfile == "not open") {  abort = true; }        
244                         else if (ffastqfile == "not found") { ffastqfile = ""; }
245                         
246                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
247                         if (rfastqfile == "not open") {  abort = true; }        
248                         else if (rfastqfile == "not found") { rfastqfile = "";  }
249             
250             ffastafile = validParameter.validFile(parameters, "ffasta", true);
251                         if (ffastafile == "not open") {  abort = true; }        
252                         else if (ffastafile == "not found") { ffastafile = ""; }
253                         
254                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
255                         if (rfastafile == "not open") {  abort = true; }        
256                         else if (rfastafile == "not found") { rfastafile = "";  }
257             
258             fqualfile = validParameter.validFile(parameters, "fqfile", true);
259                         if (fqualfile == "not open") {  abort = true; } 
260                         else if (fqualfile == "not found") { fqualfile = ""; }
261                         
262                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
263                         if (rqualfile == "not open") {  abort = true; } 
264                         else if (rqualfile == "not found") { rqualfile = "";  }
265             
266             file = validParameter.validFile(parameters, "file", true);
267                         if (file == "not open") {  abort = true; }      
268                         else if (file == "not found") { file = "";  }
269             
270             //provide at least
271             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
273             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
274             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
275             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
276             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
277             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
278             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
279             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
280                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
281             }
282             
283             oligosfile = validParameter.validFile(parameters, "oligos", true);
284                         if (oligosfile == "not found")      {   oligosfile = "";        }
285                         else if(oligosfile == "not open")   {   abort = true;       } 
286                         else {   m->setOligosFile(oligosfile);          }
287             
288             findexfile = validParameter.validFile(parameters, "findex", true);
289                         if (findexfile == "not found")      {   findexfile = "";        }
290                         else if(findexfile == "not open")   {   abort = true;       }
291             
292             rindexfile = validParameter.validFile(parameters, "rindex", true);
293                         if (rindexfile == "not found")      {   rindexfile = "";        }
294                         else if(rindexfile == "not open")   {   abort = true;       }
295         
296             if ((rindexfile != "") || (findexfile != "")) {
297                                 if (oligosfile == ""){
298                                         oligosfile = m->getOligosFile();
299                                         if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n");  }
300                                         else {
301                         m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
302                                         }
303                                 }
304                 
305                 //can only use an index file with the fastq parameters not fasta and qual
306                 if ((ffastafile != "") || (rfastafile != "")) {
307                     m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
308                 }
309                         }
310             
311             //if the user changes the output directory command factory will send this info to us in the output parameter 
312                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
313                  outputDir = ""; 
314             }
315                         
316
317                         //check for optional parameter and set defaults
318                         // ...at some point should added some additional type checking...
319                         string temp;
320                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
321                         m->mothurConvert(temp, match);  
322                         
323                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
324                         m->mothurConvert(temp, misMatch);  
325             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
326                         
327                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
328                         m->mothurConvert(temp, gapOpen);  
329             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
330                         
331                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
332                         m->mothurConvert(temp, gapExtend); 
333             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
334                         
335             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
336                         m->mothurConvert(temp, insert); 
337             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
338
339             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
340                         m->mothurConvert(temp, deltaq);
341             
342                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
343                         m->setProcessors(temp);
344                         m->mothurConvert(temp, processors);
345             
346             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
347                         m->mothurConvert(temp, bdiffs);
348                         
349                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
350                         m->mothurConvert(temp, pdiffs);
351             
352   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
353 //                      m->mothurConvert(temp, ldiffs);
354             ldiffs = 0;
355             
356  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
357  //           m->mothurConvert(temp, sdiffs);
358             sdiffs = 0;
359                         
360                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
361                         m->mothurConvert(temp, tdiffs);
362                         
363                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
364
365             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
366                         allFiles = m->isTrue(temp);
367             
368             
369             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
370                         trimOverlap = m->isTrue(temp);
371                         
372                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
373                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
374             
375             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
376             
377             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
378                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
379                                 abort=true;
380                         }
381             
382             temp = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }
383                         reorient = m->isTrue(temp);
384             
385             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
386             for (int i = -64; i < 65; i++) { 
387                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
388                 convertTable.push_back(temp);
389             }
390         }
391                 
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
395                 exit(1);
396         }
397 }
398 //**********************************************************************************************************************
399 int MakeContigsCommand::execute(){
400         try {
401                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
402         
403         //read ffastq and rfastq files creating fasta and qual files.
404         //this function will create a forward and reverse, fasta and qual files for each processor.
405         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
406         unsigned long int numReads = 0;
407         int start = time(NULL);
408         longestBase = 1000;
409         m->mothurOut("Reading fastq data...\n"); 
410         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
411         m->mothurOut("Done.\n");
412        
413         if (m->control_pressed) { return 0; }
414         
415         map<string, string> cvars;
416         string compOutputDir = outputDir;
417         if (outputDir == "") { compOutputDir = m->hasPath(file); }
418         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
419         cvars["[tag]"] = "";
420         string compositeGroupFile = getOutputFileName("group",cvars);
421         cvars["[tag]"] = "trim";
422         string compositeFastaFile = getOutputFileName("fasta",cvars);
423         cvars["[tag]"] = "scrap";
424         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
425         cvars["[tag]"] = "";
426         string compositeMisMatchFile = getOutputFileName("report",cvars);
427         
428         if (filesToProcess.size() > 1) { //clear files for append below
429             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
430             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
431             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
432             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
433             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
434             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
435             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
436         }
437         
438         map<string, int> totalGroupCounts;
439         
440         for (int l = 0; l < filesToProcess.size(); l++) {
441             
442             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
443             
444             groupCounts.clear();
445             groupMap.clear();
446             vector<vector<string> > fastaFileNames;
447             map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
448             createOligosGroup = false;
449             oligos = new Oligos();
450             numBarcodes = 0; numFPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
451             string outputGroupFileName;
452             map<string, string> variables; 
453             string thisOutputDir = outputDir;
454             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
455             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
456             variables["[tag]"] = "";
457             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"], uniqueFastaNames);  }
458             if (createOligosGroup || createFileGroup) {
459                 outputGroupFileName = getOutputFileName("group",variables);
460             }
461             
462             //give group in file file precedence
463             if (createFileGroup) {  createOligosGroup = false; }
464             
465             variables["[tag]"] = "trim";
466             string outFastaFile = getOutputFileName("fasta",variables);
467             variables["[tag]"] = "scrap";
468             string outScrapFastaFile = getOutputFileName("fasta",variables);
469             variables["[tag]"] = "";
470             string outMisMatchFile = getOutputFileName("report",variables);
471                         
472             m->mothurOut("Making contigs...\n"); 
473             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
474             
475             //remove temp fasta and qual files
476             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
477             
478             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  delete oligos; return 0; }
479             
480             if(allFiles){
481                 // so we don't add the same groupfile multiple times
482                 map<string, string>::iterator it;
483                 set<string> namesToRemove;
484                 for(int i=0;i<fastaFileNames.size();i++){
485                     for(int j=0;j<fastaFileNames[0].size();j++){
486                         if (fastaFileNames[i][j] != "") {
487                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
488                                 if(m->isBlank(fastaFileNames[i][j])){
489                                     m->mothurRemove(fastaFileNames[i][j]);
490                                     namesToRemove.insert(fastaFileNames[i][j]);
491                                     uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
492                                 }
493                             }
494                         }
495                     }
496                 }
497                 
498                 //remove names for outputFileNames, just cleans up the output
499                 vector<string> outputNames2;
500                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
501                 outputNames = outputNames2;
502                 
503                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
504                     ifstream in;
505                     m->openInputFile(it->first, in);
506                     
507                     ofstream out;
508                     variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
509                     string thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
510                     m->openOutputFile(thisGroupName, out);
511                     
512                     while (!in.eof()){
513                         if (m->control_pressed) { break; }
514                         
515                         Sequence currSeq(in); m->gobble(in);
516                         out << currSeq.getName() << '\t' << it->second << endl;
517                     }
518                     out.close();
519                     in.close();
520                 }
521             }
522             
523             if (createFileGroup || createOligosGroup) {
524                 ofstream outGroup;
525                 m->openOutputFile(outputGroupFileName, outGroup);
526                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
527                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
528                 }
529                 outGroup.close();
530             }
531             
532             if (filesToProcess.size() > 1) { //merge into large combo files
533                 if (createFileGroup || createOligosGroup) {
534                     if (l == 0) {
535                         ofstream outCGroup;
536                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
537                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
538                     }
539                     m->appendFiles(outputGroupFileName, compositeGroupFile);
540                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
541                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
542                     
543                     for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
544                         map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
545                         if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
546                         else { itTemp->second += itGroups->second; } //existing group, update total
547                     }
548                 }
549                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
550                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
551                 m->appendFiles(outFastaFile, compositeFastaFile);
552                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
553                 if (!allFiles) {
554                     m->mothurRemove(outMisMatchFile);
555                     m->mothurRemove(outFastaFile);
556                     m->mothurRemove(outScrapFastaFile);
557                 }else {
558                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
559                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
560                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
561                 }
562             }else {
563                 totalGroupCounts = groupCounts;
564                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
565                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
566                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
567                 if (createFileGroup || createOligosGroup) {
568                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
569                 }
570             }
571             m->mothurOut("Done.\n");
572             delete oligos;
573         }
574         
575         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
576         
577         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
578         
579                 //output group counts
580                 m->mothurOutEndLine();
581                 int total = 0;
582                 if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
583                 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
584             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
585                 }
586                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
587                 
588                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
589         
590         string currentFasta = "";
591                 itTypes = outputTypes.find("fasta");
592                 if (itTypes != outputTypes.end()) {
593                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
594                 }
595         
596         string currentGroup = "";
597                 itTypes = outputTypes.find("group");
598                 if (itTypes != outputTypes.end()) {
599                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
600                 }
601                 
602         //output files created by command
603                 m->mothurOutEndLine();
604                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
605                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
606                 m->mothurOutEndLine();
607
608         return 0;
609     }
610         catch(exception& e) {
611                 m->errorOut(e, "MakeContigsCommand", "execute");
612                 exit(1);
613         }
614 }
615 //**********************************************************************************************************************
616 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
617         try {
618         vector< vector< vector<string> > > filesToProcess;
619         
620         if (ffastqfile != "") { //reading one file
621             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
622             //adjust for really large processors or really small files
623             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
624             if (numReads < processors) { 
625                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
626                 files.resize(numReads);
627                 processors = numReads; 
628             }
629             filesToProcess.push_back(files);
630         }else if (file != "") { //reading multiple files
631             //return only valid pairs
632             vector< vector<string> > filePairsToProcess = readFileNames(file);
633             
634             if (m->control_pressed) { return filesToProcess; }
635             
636             if (filePairsToProcess.size() != 0) {
637                 for (int i = 0; i < filePairsToProcess.size(); i++) {
638                     
639                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
640                     
641                     unsigned long int thisFilesReads;
642                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
643                     
644                     //adjust for really large processors or really small files
645                     if (thisFilesReads < processors) { 
646                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
647                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
648                         //remove from file2Group if necassary
649                         map<int, string> cFile2Group;
650                         for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
651                             if ((it->first) < i) { cFile2Group[it->first] = it->second; }
652                             else if ((it->first) == i) { } //do nothing, we removed files for i
653                             else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
654                         }
655                         file2Group = cFile2Group;
656                     }else {
657                         filesToProcess.push_back(files);
658                         numReads += thisFilesReads;
659                     }
660                 }
661                 //all files are bad
662                 if (numReads == 0) {  m->control_pressed = true; }
663             }
664         }else if (ffastafile != "") {
665             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
666             //adjust for really large processors or really small files
667             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
668             if (numReads < processors) { 
669                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
670                 files.resize(numReads);
671                 processors = numReads; 
672             }
673             filesToProcess.push_back(files);
674         }else { m->control_pressed = true; } //should not get here
675         
676         return filesToProcess;
677     }
678         catch(exception& e) {
679                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
680                 exit(1);
681         }
682 }
683 //**********************************************************************************************************************
684 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
685         try {
686                 int num = 0;
687                 vector<int> processIDS;
688         string group = "";
689         map<int, string>::iterator it = file2Group.find(index);
690         if (it != file2Group.end()) { group = it->second; }
691         
692 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
693                 int process = 0;
694                 
695                 //loop through and create all the processes you want
696                 while (process != processors-1) {
697                         pid_t pid = fork();
698                         
699                         if (pid > 0) {
700                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
701                                 process++;
702                         }else if (pid == 0){
703                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
704                 
705                                 if(allFiles){
706                                         ofstream temp;
707                     
708                                         for(int i=0;i<tempFASTAFileNames.size();i++){
709                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
710                                                         if (tempFASTAFileNames[i][j] != "") {
711                                                                 tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
712                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
713                                                         }
714                                                 }
715                                         }
716                                 }
717                 
718                                 num = driver(files[process], 
719                              outputFasta + m->mothurGetpid(process) + ".temp",
720                              outputScrapFasta + m->mothurGetpid(process) + ".temp",
721                              outputMisMatches + m->mothurGetpid(process) + ".temp",
722                              tempFASTAFileNames, process, group);
723                                 
724                                 //pass groupCounts to parent
725                 ofstream out;
726                 string tempFile = m->mothurGetpid(process) + ".num.temp";
727                 m->openOutputFile(tempFile, out);
728                 out << num << endl;
729                                 if (createFileGroup || createOligosGroup) {
730                                         out << groupCounts.size() << endl;
731                                         
732                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
733                                                 out << it->first << '\t' << it->second << endl;
734                                         }
735                     
736                     out << groupMap.size() << endl;
737                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
738                                                 out << it->first << '\t' << it->second << endl;
739                                         }
740                                 }
741                 out.close();
742                                 
743                                 exit(0);
744                         }else { 
745                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
746                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
747                                 exit(0);
748                         }
749                 }
750                 
751         ofstream temp;
752                 m->openOutputFile(outputFasta, temp);           temp.close();
753         m->openOutputFile(outputScrapFasta, temp);              temp.close();
754                 
755                 //do my part
756                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
757                 
758                 //force parent to wait until all the processes are done
759                 for (int i=0;i<processIDS.size();i++) { 
760                         int temp = processIDS[i];
761                         wait(&temp);
762                 }
763         
764                 for (int i = 0; i < processIDS.size(); i++) {
765             ifstream in;
766             string tempFile = toString(processIDS[i]) + ".num.temp";
767             m->openInputFile(tempFile, in);
768             int tempNum;
769             in >> tempNum; num += tempNum; m->gobble(in);
770             
771                         if (createFileGroup || createOligosGroup) {
772                                 string group;
773                                 in >> tempNum; m->gobble(in);
774                                 
775                                 if (tempNum != 0) {
776                                         for (int j = 0; j < tempNum; j++) { 
777                         int groupNum;
778                                                 in >> group >> groupNum; m->gobble(in);
779                         
780                                                 map<string, int>::iterator it = groupCounts.find(group);
781                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
782                                                 else { groupCounts[it->first] += groupNum; }
783                                         }
784                                 }
785                 in >> tempNum; m->gobble(in);
786                 if (tempNum != 0) {
787                                         for (int j = 0; j < tempNum; j++) { 
788                         string group, seqName;
789                                                 in >> seqName >> group; m->gobble(in);
790                         
791                                                 map<string, string>::iterator it = groupMap.find(seqName);
792                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
793                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
794                                         }
795                                 }
796                         }
797             in.close(); m->mothurRemove(tempFile);
798         }
799     #else
800         
801         //////////////////////////////////////////////////////////////////////////////////////////////////////
802                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
803                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
804                 //////////////////////////////////////////////////////////////////////////////////////////////////////
805                 
806                 vector<contigsData*> pDataArray; 
807                 DWORD   dwThreadIdArray[processors-1];
808                 HANDLE  hThreadArray[processors-1]; 
809                 
810                 //Create processor worker threads.
811                 for( int h=0; h<processors-1; h++ ){
812                         string extension = "";
813                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
814             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
815                         
816             if(allFiles){
817                 ofstream temp;
818                 
819                 for(int i=0;i<tempFASTAFileNames.size();i++){
820                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
821                         if (tempFASTAFileNames[i][j] != "") {
822                             tempFASTAFileNames[i][j] += extension;
823                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
824                         }
825                     }
826                 }
827             }
828                                  
829                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, tempFASTAFileNames, oligosfile, reorient, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
830                         pDataArray.push_back(tempcontig);
831             
832                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
833                 }
834         
835         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
836
837         if(allFiles){
838             ofstream temp;
839             string extension = toString(processors-1) + ".temp";
840             
841             for(int i=0;i<tempFASTAFileNames.size();i++){
842                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
843                     if (tempFASTAFileNames[i][j] != "") {
844                         tempFASTAFileNames[i][j] += extension;
845                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
846                     }
847                 }
848             }
849         }
850
851                 //parent do my part
852                 ofstream temp;
853                 m->openOutputFile(outputFasta, temp);           temp.close();
854         m->openOutputFile(outputScrapFasta, temp);              temp.close();
855         
856         //do my part
857         processIDS.push_back(processors-1);
858                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
859         
860                 //Wait until all threads have terminated.
861                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
862                 
863                 //Close all thread handles and free memory allocations.
864                 for(int i=0; i < pDataArray.size(); i++){
865                         num += pDataArray[i]->count;
866             if (!pDataArray[i]->done) {
867                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
868             }
869             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
870                 map<string, int>::iterator it2 = groupCounts.find(it->first);
871                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
872                 else { groupCounts[it->first] += it->second; }
873             }
874             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
875                 map<string, string>::iterator it2 = groupMap.find(it->first);
876                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
877                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
878             }
879             CloseHandle(hThreadArray[i]);
880                         delete pDataArray[i];
881         }
882                                 
883     #endif      
884         
885         for (int i = 0; i < processIDS.size(); i++) {
886                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
887                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
888                         
889                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
890                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
891             
892             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
893                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
894             
895             if(allFiles){
896                                 for(int j=0;j<fastaFileNames.size();j++){
897                                         for(int k=0;k<fastaFileNames[j].size();k++){
898                                                 if (fastaFileNames[j][k] != "") {
899                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
900                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
901                                                 }
902                                         }
903                                 }
904                         }
905                 }
906                 
907                 return num;
908         }
909         catch(exception& e) {
910                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
911                 exit(1);
912         }
913 }
914 //**********************************************************************************************************************
915 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
916     try {
917         
918         Alignment* alignment;
919         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
920                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
921         
922         int num = 0;
923         string thisffastafile = files[0];
924         string thisfqualfile = files[1];
925         string thisrfastafile = files[2];
926         string thisrqualfile = files[3];
927         string thisfindexfile = files[4];
928         string thisrindexfile = files[5];
929         
930         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
931         
932         ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
933         ofstream outFasta, outMisMatch, outScrapFasta;
934         m->openInputFile(thisffastafile, inFFasta);
935         m->openInputFile(thisrfastafile, inRFasta);
936         if (thisfqualfile != "") {
937             m->openInputFile(thisfqualfile, inFQual);
938             m->openInputFile(thisrqualfile, inRQual);
939         }
940         
941         if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex);  }
942         if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex);  }
943         
944         m->openOutputFile(outputFasta, outFasta);
945         m->openOutputFile(outputScrapFasta, outScrapFasta);
946         m->openOutputFile(outputMisMatches, outMisMatch);
947         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
948         
949         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, oligos->getPairedPrimers(), oligos->getPairedBarcodes());
950         
951         TrimOligos* rtrimOligos = NULL;
952         if (reorient) {
953             rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos->getReorientedPairedPrimers(), oligos->getReorientedPairedBarcodes()); numBarcodes = oligos->getReorientedPairedBarcodes().size();
954         }
955         
956         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
957             
958             if (m->control_pressed) { break; }
959             
960             int success = 1;
961             string trashCode = "";
962             int currentSeqsDiffs = 0;
963
964             //read seqs and quality info
965             Sequence fSeq(inFFasta); m->gobble(inFFasta);
966             Sequence rSeq(inRFasta); m->gobble(inRFasta);
967             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
968             if (thisfqualfile != "") {
969                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
970                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
971             }
972             Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
973             if (thisfindexfile != "") {
974                 Sequence temp(inFIndex); m->gobble(inFIndex);
975                 findexBarcode.setAligned(temp.getAligned());
976             }
977             
978             if (thisrindexfile != "") {
979                 Sequence temp(inRIndex); m->gobble(inRIndex);
980                 rindexBarcode.setAligned(temp.getAligned());
981             }
982             
983             int barcodeIndex = 0;
984             int primerIndex = 0;
985             Sequence savedFSeq(fSeq.getName(), fSeq.getAligned());  Sequence savedRSeq(rSeq.getName(), rSeq.getAligned());
986             Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned());
987             QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL;
988             if (thisfqualfile != "") {
989                 savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores());
990                 savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores());
991             }
992                         
993             if(numBarcodes != 0){
994                 if (thisfqualfile != "") {
995                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
996                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
997                     }else {
998                         success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
999                     }
1000                 }else {
1001                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
1002                 }
1003                 if(success > bdiffs)            {       trashCode += 'b';       }
1004                 else{ currentSeqsDiffs += success;  }
1005             }
1006             
1007             if(numFPrimers != 0){
1008                 if (thisfqualfile != "") {
1009                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
1010                 }else {
1011                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
1012                 }
1013                 if(success > pdiffs)            {       trashCode += 'f';       }
1014                 else{ currentSeqsDiffs += success;  }
1015             }
1016             
1017             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
1018             
1019             if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
1020                 int thisSuccess = 0;
1021                 string thisTrashCode = "";
1022                 int thisCurrentSeqsDiffs = 0;
1023                 
1024                 int thisBarcodeIndex = 0;
1025                 int thisPrimerIndex = 0;
1026                 
1027                 if(numBarcodes != 0){
1028                     if (thisfqualfile != "") {
1029                         if ((thisfindexfile != "") || (thisrindexfile != "")) {
1030                             thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex);
1031                         }else {
1032                             thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex);
1033                         }
1034                     }else {
1035                         thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex);
1036                     }
1037                     if(thisSuccess > bdiffs)            {       thisTrashCode += 'b';   }
1038                     else{ thisCurrentSeqsDiffs += thisSuccess;  }
1039                 }
1040
1041                 if(numFPrimers != 0){
1042                     if (thisfqualfile != "") {
1043                         thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex);
1044                     }else {
1045                         thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex);
1046                     }
1047                     if(thisSuccess > pdiffs)            {       thisTrashCode += 'f';   }
1048                     else{ thisCurrentSeqsDiffs += thisSuccess;  }
1049                 }
1050                 
1051                 if (thisCurrentSeqsDiffs > tdiffs)      {       thisTrashCode += 't';   }
1052                 
1053                 if (thisTrashCode == "") {
1054                     trashCode = thisTrashCode;
1055                     success = thisSuccess;
1056                     currentSeqsDiffs = thisCurrentSeqsDiffs;
1057                     barcodeIndex = thisBarcodeIndex;
1058                     primerIndex = thisPrimerIndex;
1059                     savedFSeq.reverseComplement();
1060                     savedRSeq.reverseComplement();
1061                     fSeq.setAligned(savedFSeq.getAligned());
1062                     rSeq.setAligned(savedRSeq.getAligned());
1063                     if(thisfqualfile != ""){
1064                         savedFQual->flipQScores(); savedRQual->flipQScores();
1065                         fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores());
1066                     }
1067                 }else { trashCode += "(" + thisTrashCode + ")";  }
1068             }
1069
1070             
1071             //flip the reverse reads
1072             rSeq.reverseComplement();
1073             if (thisfqualfile != "") { rQual->flipQScores(); }
1074
1075             //pairwise align
1076             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1077             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1078             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1079             fSeq.setAligned(alignment->getSeqAAln());
1080             rSeq.setAligned(alignment->getSeqBAln());
1081             int length = fSeq.getAligned().length();
1082             
1083             //traverse alignments merging into one contiguous seq
1084             string contig = "";
1085             int numMismatches = 0;
1086             string seq1 = fSeq.getAligned();
1087             string seq2 = rSeq.getAligned();
1088             vector<int> scores1, scores2; 
1089             if (thisfqualfile != "") {
1090                 scores1 = fQual->getQualityScores();
1091                 scores2 = rQual->getQualityScores();
1092                 delete fQual; delete rQual;  delete savedFQual; delete savedRQual;
1093             }
1094             
1095             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1096             int overlapStart = fSeq.getStartPos();
1097             int seq2Start = rSeq.getStartPos();
1098             
1099             //bigger of the 2 starting positions is the location of the overlapping start
1100             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1101                 overlapStart = seq2Start; 
1102                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
1103             }else { //seq1 starts later so take from 0 to overlapStart from seq2
1104                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
1105             }
1106             
1107             int seq1End = fSeq.getEndPos();
1108             int seq2End = rSeq.getEndPos();
1109             int overlapEnd = seq1End;
1110             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
1111             
1112             int oStart = contig.length();
1113             //cout << fSeq.getAligned()  << endl; cout << rSeq.getAligned() << endl;
1114             for (int i = overlapStart; i < overlapEnd; i++) {
1115                 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1116                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1117                     contig += seq1[i];
1118                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1119                     if (thisfqualfile != "") {
1120                         if (scores2[BBaseMap[i]] <= insert) { } //
1121                         else { contig += seq2[i];  }
1122                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
1123                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1124                     if (thisfqualfile != "") {
1125                         if (scores1[ABaseMap[i]] <= insert) { } //
1126                         else { contig += seq1[i];  }
1127                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
1128                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1129                     if (thisfqualfile != "") {
1130                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1131                             char c = seq1[i];
1132                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1133                             contig += c;
1134                         }else { //if no, base becomes n
1135                             contig += 'N';
1136                         }
1137                         numMismatches++;
1138                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1139                 }else { //should never get here
1140                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1141                 }
1142             }
1143             int oend = contig.length();
1144             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1145                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
1146             }else { //seq2 ends before seq1 so take from overlap to length from seq1
1147                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
1148             }
1149             //cout << contig << endl;
1150             //exit(1);
1151             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
1152             
1153             if(trashCode.length() == 0){
1154                 bool ignore = false;
1155                 
1156                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1157                 
1158                 if (createOligosGroup) {
1159                         string thisGroup = oligos->getGroupName(barcodeIndex, primerIndex);
1160                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1161                         
1162                         int pos = thisGroup.find("ignore");
1163                         if (pos == string::npos) {
1164                             groupMap[fSeq.getName()] = thisGroup; 
1165                         
1166                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1167                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1168                             else { groupCounts[it->first] ++; }
1169                         }else { ignore = true; }
1170                 }else if (createFileGroup) {
1171                     int pos = group.find("ignore");
1172                     if (pos == string::npos) {
1173                         groupMap[fSeq.getName()] = group;
1174                         
1175                         map<string, int>::iterator it = groupCounts.find(group);
1176                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1177                         else { groupCounts[it->first] ++; }
1178                     }else { ignore = true; }
1179                 }
1180                 if (m->debug) { m->mothurOut("\n"); }
1181             
1182                 if(allFiles && !ignore){
1183                     ofstream output;
1184                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1185                     output << ">" << fSeq.getName() << endl << contig << endl;
1186                     output.close();
1187                 }
1188                 
1189                 //output
1190                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1191                 int numNs = 0;
1192                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1193                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1194             }else {
1195                 //output
1196                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1197             }
1198             num++;
1199             
1200                         //report progress
1201                         if((num) % 1000 == 0){  m->mothurOutJustToScreen(toString(num)); m->mothurOutEndLine();         }
1202                 }
1203         
1204                 //report progress
1205                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1206         
1207         inFFasta.close();
1208         inRFasta.close();
1209         outFasta.close();
1210         outScrapFasta.close();
1211         outMisMatch.close();
1212         if (thisfqualfile != "") {
1213             inFQual.close();
1214             inRQual.close();
1215         }
1216         delete alignment;
1217         if (reorient) { delete rtrimOligos; }
1218         
1219         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1220     
1221         return num;
1222     }
1223         catch(exception& e) {
1224                 m->errorOut(e, "MakeContigsCommand", "driver");
1225                 exit(1);
1226         }
1227 }
1228 //**********************************************************************************************************************
1229 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1230     try {
1231         vector< vector<string> > files;
1232         //maps processors number to file pointer
1233         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1234         map<int, vector<ofstream*> >::iterator it;
1235         
1236         //create files to write to
1237         for (int i = 0; i < processors; i++) {
1238             vector<ofstream*> temp;
1239             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1240             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1241             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1242             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1243             ofstream* outFI = new ofstream;     temp.push_back(outFI);  
1244             ofstream* outRI = new ofstream;     temp.push_back(outRI);  
1245             tempfiles[i] = temp;
1246             
1247             vector<string> names;
1248             string thisOutputDir = outputDir;
1249             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1250             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1251             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1252             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1253             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1254             string findexfilename = ""; string rindexfilename = "";
1255             noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1256             if (findex != "") {  findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI);  noneOk = true; }
1257             if (rindex != "") {  rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp";  m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1258             names.push_back(ffastafilename); names.push_back(fqualfilename);
1259             names.push_back(rfastafilename); names.push_back(rqualfilename);
1260             names.push_back(findexfilename); names.push_back(rindexfilename);
1261             files.push_back(names);
1262             
1263             m->openOutputFile(ffastafilename, *outFF);
1264             m->openOutputFile(rfastafilename, *outRF);
1265             m->openOutputFile(fqualfilename, *outFQ);
1266             m->openOutputFile(rqualfilename, *outRQ);
1267         }
1268         
1269         if (m->control_pressed) {
1270             //close files, delete ofstreams
1271             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1272             //remove files
1273             for (int i = 0; i < files.size(); i++) {  
1274                 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") {  m->mothurRemove(files[i][j]); }  }
1275             }
1276         }
1277         
1278         ifstream inForward;
1279         m->openInputFile(ffastq, inForward);
1280         
1281         ifstream inReverse;
1282         m->openInputFile(rfastq, inReverse);
1283         
1284         ifstream infIndex, inrIndex;
1285         bool findexIsGood = false;
1286         bool rindexIsGood = false;
1287         if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1288         if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1289         
1290         count = 0;
1291         map<string, fastqRead> uniques;
1292         map<string, fastqRead> iUniques;
1293         map<string, pairFastqRead> pairUniques;
1294         map<string, fastqRead>::iterator itUniques;
1295         while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1296             
1297             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1298             
1299             //get a read from forward and reverse fastq files
1300             bool ignoref, ignorer, ignorefi, ignoreri;
1301             fastqRead thisFread, thisRread, thisFIread, thisRIread;
1302             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1303             else { ignoref = true; }
1304             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1305             else { ignorer = true; }
1306             if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1307             else { ignorefi = true; }
1308             if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri);  if (inrIndex.eof()) { rindexIsGood = false; } }
1309             else { ignoreri = true; }
1310             
1311             bool allowOne = false;
1312             if ((findex == "") || (rindex == "")) { allowOne = true; }
1313             vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1314             vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1315             
1316             //add in index info if provided
1317             vector<pairFastqRead> reads = frReads;
1318             if ((findex != "") || (rindex != "")) {  reads = mergeReads(frReads, friReads, pairUniques); }
1319             
1320             for (int i = 0; i < reads.size(); i++) {
1321                 fastqRead fread = reads[i].forward;
1322                 fastqRead rread = reads[i].reverse;
1323                 fastqRead firead = reads[i].findex;
1324                 fastqRead riread = reads[i].rindex;
1325                 
1326                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1327                
1328                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1329                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1330                     
1331                     //if the reads are okay write to output files
1332                     int process = count % processors;
1333                     
1334                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1335                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1336                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1337                     *(tempfiles[process][1]) << endl;
1338                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1339                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1340                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1341                     *(tempfiles[process][3]) << endl;
1342                     if (findex != "") {  *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1343                     if (rindex != "") {  *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1344                 
1345                     count++;
1346                     
1347                     //report progress
1348                     if((count) % 10000 == 0){   m->mothurOutJustToScreen(toString(count)); m->mothurOutEndLine();               }
1349                 //}
1350             }
1351                 }
1352                 //report progress
1353                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1354         
1355         if (uniques.size() != 0) {
1356             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1357                 if (m->control_pressed) { break; }
1358                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1359             }
1360             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1361                 if (m->control_pressed) { break; }
1362                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1363             }
1364             m->mothurOutEndLine();
1365         }
1366         
1367         //close files, delete ofstreams
1368         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1369         inForward.close();
1370         inReverse.close();
1371         if (findex != "") { infIndex.close(); }
1372         if (rindex != "") { inrIndex.close(); }
1373        
1374         return files;
1375     }
1376     catch(exception& e) {
1377         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1378         exit(1);
1379     }
1380 }
1381 //**********************************************************************************************************************
1382 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1383     try {
1384         vector< vector<string> > files;
1385         //maps processors number to file pointer
1386         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1387         map<int, vector<ofstream*> >::iterator it;
1388         
1389         //create files to write to
1390         for (int i = 0; i < processors; i++) {
1391             vector<ofstream*> temp;
1392             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1393             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1394             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1395             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1396             tempfiles[i] = temp;
1397             
1398             vector<string> names;
1399             string thisOutputDir = outputDir;
1400             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1401             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1402             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1403             string fqualfilename = "";
1404             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1405             string rqualfilename = "";
1406             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1407             string findexfilename = ""; string rindexfilename = "";
1408             names.push_back(ffastafilename); names.push_back(fqualfilename);
1409             names.push_back(rfastafilename); names.push_back(rqualfilename);
1410             names.push_back(findexfilename); names.push_back(rindexfilename);
1411             files.push_back(names);
1412             
1413             m->openOutputFile(ffastafilename, *outFF);
1414             m->openOutputFile(rfastafilename, *outRF);
1415         }
1416         
1417         if (m->control_pressed) {
1418             //close files, delete ofstreams
1419             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1420             //remove files
1421             for (int i = 0; i < files.size(); i++) {  
1422                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1423             }
1424         }
1425         
1426         ifstream inForwardFasta;
1427         m->openInputFile(ffasta, inForwardFasta);
1428         
1429         ifstream inReverseFasta;
1430         m->openInputFile(rfasta, inReverseFasta);
1431         
1432         ifstream inForwardQual, inReverseQual;
1433         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1434         
1435         count = 0;
1436         map<string, fastqRead> uniques;
1437         map<string, fastqRead>::iterator itUniques;
1438         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1439             
1440             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1441             
1442             //get a reads from forward and reverse fasta files
1443             bool ignoref, ignorer;
1444             fastqRead thisFread, thisRread;
1445             if (!inForwardFasta.eof()) {  
1446                 ignoref = false; 
1447                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1448                 thisFread.name = temp.getName();
1449                 thisFread.sequence = temp.getUnaligned();
1450             }else { ignoref = true; }
1451             if (!inReverseFasta.eof()) {  
1452                 ignorer = false; 
1453                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1454                 thisRread.name = temp.getName();
1455                 thisRread.sequence = temp.getUnaligned();  
1456             }else { ignorer = true; }
1457             
1458             //get qual reads if given
1459             if (fqualfile != "") {
1460                 if (!inForwardQual.eof() && !ignoref) {  
1461                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1462                     //if forward files dont match ignore read
1463                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1464                     else { thisFread.scores = temp.getQualityScores(); }
1465                 }else { ignoref = true; }
1466                 if (!inReverseQual.eof() && !ignorer) {  
1467                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1468                     //if reverse files dont match ignore read
1469                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1470                     else { thisRread.scores = temp.getQualityScores(); }
1471                 }else { ignorer = true; }
1472             }
1473             
1474             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1475             
1476             for (int i = 0; i < reads.size(); i++) {
1477                 fastqRead fread = reads[i].forward;
1478                 fastqRead rread = reads[i].reverse;
1479                 
1480                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1481                 
1482                // if (checkReads(fread, rread, ffasta, rfasta)) {
1483                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1484                     
1485                     //if the reads are okay write to output files
1486                     int process = count % processors;
1487                     
1488                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1489                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1490                     if (fqualfile != "") { //if you have quality info, print it
1491                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1492                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1493                         *(tempfiles[process][1]) << endl;
1494                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1495                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1496                         *(tempfiles[process][3]) << endl;
1497                     }
1498                     count++;
1499                     
1500                     //report progress
1501                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1502                 //}
1503             }
1504                 }
1505                 //report progress
1506                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1507         
1508         if (uniques.size() != 0) {
1509             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1510                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1511             }
1512             m->mothurOutEndLine();
1513         }
1514         
1515         //close files, delete ofstreams
1516         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1517         inReverseFasta.close(); 
1518         inForwardFasta.close(); 
1519         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1520         
1521         return files;
1522     }
1523     catch(exception& e) {
1524         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1525         exit(1);
1526     }
1527 }
1528 //**********************************************************************************************************************
1529 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1530     try {
1531         vector<pairFastqRead> reads;
1532         map<string, fastqRead>::iterator itUniques;
1533             
1534         if (!ignoref && !ignorer) {
1535             if (forward.name == reverse.name) { 
1536                 pairFastqRead temp(forward, reverse);
1537                 reads.push_back(temp);
1538             }else {
1539                 bool match = false;
1540                 //if no match are the names only different by 1 and 2?
1541                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1542                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1543                 if (tempFRead == tempRRead) {
1544                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1545                         forward.name = tempFRead;
1546                         reverse.name = tempRRead;
1547                         pairFastqRead temp(forward, reverse);
1548                         reads.push_back(temp);
1549                         match = true;
1550                     }
1551                 }
1552                 
1553                 if (!match) {
1554                     //look for forward pair
1555                     itUniques = uniques.find(forward.name);
1556                     if (itUniques != uniques.end()) {  //we have the pair for this read
1557                         pairFastqRead temp(forward, itUniques->second);
1558                         reads.push_back(temp);
1559                         uniques.erase(itUniques);
1560                     }else { //save this read for later
1561                         uniques[forward.name] = forward;
1562                     }
1563                     
1564                     //look for reverse pair
1565                     itUniques = uniques.find(reverse.name);
1566                     if (itUniques != uniques.end()) {  //we have the pair for this read
1567                         pairFastqRead temp(itUniques->second, reverse);
1568                         reads.push_back(temp);
1569                         uniques.erase(itUniques);
1570                     }else { //save this read for later
1571                         uniques[reverse.name] = reverse;
1572                     }
1573                 }
1574                                 
1575             }
1576         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1577             if (allowOne) {
1578                 fastqRead dummy;
1579                 pairFastqRead temp(forward, dummy);
1580                 reads.push_back(temp);
1581             }else {
1582                 //look for forward pair
1583                 itUniques = uniques.find(forward.name);
1584                 if (itUniques != uniques.end()) {  //we have the pair for this read
1585                     pairFastqRead temp(forward, itUniques->second);
1586                     reads.push_back(temp);
1587                     uniques.erase(itUniques);
1588                 }else { //save this read for later
1589                     uniques[forward.name] = forward;
1590                 }
1591             }
1592         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1593             if (allowOne) {
1594                 fastqRead dummy;
1595                 pairFastqRead temp(dummy, reverse);
1596                 reads.push_back(temp);
1597             }else {
1598                 //look for reverse pair
1599                 itUniques = uniques.find(reverse.name);
1600                 if (itUniques != uniques.end()) {  //we have the pair for this read
1601                     pairFastqRead temp(itUniques->second, reverse);
1602                     reads.push_back(temp);
1603                     uniques.erase(itUniques);
1604                 }else { //save this read for later
1605                     uniques[reverse.name] = reverse;
1606                 }
1607             }
1608         }//else ignore both and do nothing
1609         
1610         return reads;
1611     }
1612     catch(exception& e) {
1613         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1614         exit(1);
1615     }
1616 }
1617 //**********************************************************************************************************************
1618 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1619 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1620     try {
1621         vector<pairFastqRead> reads;
1622         map<string, pairFastqRead>::iterator itUniques;
1623         
1624         set<int> foundIndexes;
1625         for (int i = 0; i < thisReads.size(); i++) {
1626             bool found = false;
1627             for (int j = 0; j < indexes.size(); j++) {
1628                 
1629                 //incase only one index
1630                 string indexName = indexes[j].forward.name;
1631                 if (indexName == "") { indexName = indexes[j].reverse.name; }
1632                 
1633                 if (thisReads[i].forward.name == indexName){
1634                     thisReads[i].findex = indexes[j].forward;
1635                     thisReads[i].rindex = indexes[j].reverse;
1636                     reads.push_back(thisReads[i]);
1637                     found = true;
1638                     foundIndexes.insert(j);
1639                 }
1640             }
1641             
1642             if (!found) {
1643                 //look for forward pair
1644                 itUniques = uniques.find('i'+thisReads[i].forward.name);
1645                 if (itUniques != uniques.end()) {  //we have the pair for this read
1646                     thisReads[i].findex = itUniques->second.forward;
1647                     thisReads[i].rindex = itUniques->second.reverse;
1648                     reads.push_back(thisReads[i]);
1649                     uniques.erase(itUniques);
1650                 }else { //save this read for later
1651                     uniques['r'+thisReads[i].forward.name] = thisReads[i];
1652                 }
1653             }
1654         }
1655         
1656         if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1657             for (int j = 0; j < indexes.size(); j++) {
1658                 if (foundIndexes.count(j) == 0) { //we didnt find this one
1659                     //incase only one index
1660                     string indexName = indexes[j].forward.name;
1661                     if (indexName == "") { indexName = indexes[j].reverse.name; }
1662                     
1663                     //look for forward pair
1664                     itUniques = uniques.find('r'+indexName);
1665                     if (itUniques != uniques.end()) {  //we have the pair for this read
1666                         pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1667                         reads.push_back(temp);
1668                         uniques.erase(itUniques);
1669                     }else { //save this read for later
1670                         uniques['i'+indexName] = indexes[j];
1671                     }
1672                 }
1673             }
1674         }
1675        
1676         
1677         return reads;
1678     }
1679     catch(exception& e) {
1680         m->errorOut(e, "MakeContigsCommand", "mergeReads");
1681         exit(1);
1682     }
1683 }
1684 //**********************************************************************************************************************
1685 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1686     try {
1687         fastqRead read;
1688         
1689         ignore = false;
1690         
1691         //read sequence name
1692         string line = m->getline(in); m->gobble(in);
1693         vector<string> pieces = m->splitWhiteSpace(line);
1694         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1695         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1696         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1697         else { name = name.substr(1); }
1698         
1699         //read sequence
1700         string sequence = m->getline(in); m->gobble(in);
1701         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1702         
1703         //read sequence name
1704         line = m->getline(in); m->gobble(in);
1705         pieces = m->splitWhiteSpace(line);
1706         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1707         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1708         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1709         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1710         
1711         //read quality scores
1712         string quality = m->getline(in); m->gobble(in);
1713         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1714          
1715         //sanity check sequence length and number of quality scores match
1716         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1717         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1718         
1719         vector<int> qualScores = convertQual(quality);
1720         
1721         m->checkName(name);
1722         read.name = name;
1723         read.sequence = sequence;
1724         read.scores = qualScores;
1725         
1726         if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1727
1728         return read;
1729     }
1730     catch(exception& e) {
1731         m->errorOut(e, "MakeContigsCommand", "readFastq");
1732         exit(1);
1733     }
1734 }
1735 /**********************************************************************************************************************
1736 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1737     try {
1738         bool good = true;
1739         
1740         //do sequence lengths match
1741         if (forward.sequence.length() != reverse.sequence.length()) {
1742             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1743             good = false; 
1744         }
1745         
1746         //do number of qual scores match 
1747         if (forward.scores.size() != reverse.scores.size()) {
1748             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1749             good = false; 
1750         }
1751
1752         return good;
1753     }
1754     catch(exception& e) {
1755         m->errorOut(e, "MakeContigsCommand", "checkReads");
1756         exit(1);
1757     }
1758 }*/
1759 //***************************************************************************************************************
1760 //lines can be 2, 3, or 4 columns
1761 // forward.fastq reverse.fastq -> 2 column
1762 // groupName forward.fastq reverse.fastq -> 3 column
1763 // forward.fastq reverse.fastq forward.index.fastq  reverse.index.fastq  -> 4 column
1764 // forward.fastq reverse.fastq none  reverse.index.fastq  -> 4 column
1765 // forward.fastq reverse.fastq forward.index.fastq  none  -> 4 column
1766 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1767         try {
1768         vector< vector<string> > files;
1769         string forward, reverse, findex, rindex;
1770         
1771         ifstream in;
1772         m->openInputFile(filename, in);
1773         
1774         while(!in.eof()) {
1775             
1776             if (m->control_pressed) { return files; }
1777             
1778             string line = m->getline(in);  m->gobble(in);
1779             vector<string> pieces = m->splitWhiteSpace(line);
1780             
1781             string group = "";
1782             if (pieces.size() == 2) {
1783                 forward = pieces[0];
1784                 reverse = pieces[1];
1785                 group = "";
1786                 findex = "";
1787                 rindex = "";
1788             }else if (pieces.size() == 3) {
1789                 group = pieces[0];
1790                 forward = pieces[1];
1791                 reverse = pieces[2];
1792                 findex = "";
1793                 rindex = "";
1794                 createFileGroup = true;
1795             }else if (pieces.size() == 4) {
1796                 forward = pieces[0];
1797                 reverse = pieces[1];
1798                 findex = pieces[2];
1799                 rindex = pieces[3];
1800                 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1801                 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1802             }else {
1803                 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1804             }
1805             
1806             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1807             
1808             if (inputDir != "") {
1809                 string path = m->hasPath(forward);
1810                 if (path == "") {  forward = inputDir + forward;  }
1811                 
1812                 path = m->hasPath(reverse);
1813                 if (path == "") {  reverse = inputDir + reverse;  }
1814                 
1815                 if (findex != "") {
1816                     path = m->hasPath(findex);
1817                     if (path == "") {  findex = inputDir + findex;  }
1818                 }
1819                 
1820                 if (rindex != "") {
1821                     path = m->hasPath(rindex);
1822                     if (path == "") {  rindex = inputDir + rindex;  }
1823                 }
1824             }
1825             
1826             //check to make sure both are able to be opened
1827             ifstream in2;
1828             int openForward = m->openInputFile(forward, in2, "noerror");
1829             
1830             //if you can't open it, try default location
1831             if (openForward == 1) {
1832                 if (m->getDefaultPath() != "") { //default path is set
1833                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1834                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1835                     ifstream in3;
1836                     openForward = m->openInputFile(tryPath, in3, "noerror");
1837                     in3.close();
1838                     forward = tryPath;
1839                 }
1840             }
1841             
1842             //if you can't open it, try output location
1843             if (openForward == 1) {
1844                 if (m->getOutputDir() != "") { //default path is set
1845                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1846                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1847                     ifstream in4;
1848                     openForward = m->openInputFile(tryPath, in4, "noerror");
1849                     forward = tryPath;
1850                     in4.close();
1851                 }
1852             }
1853             
1854             if (openForward == 1) { //can't find it
1855                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1856             }else{  in2.close();  }
1857             
1858             ifstream in3;
1859             int openReverse = m->openInputFile(reverse, in3, "noerror");
1860             
1861             //if you can't open it, try default location
1862             if (openReverse == 1) {
1863                 if (m->getDefaultPath() != "") { //default path is set
1864                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1865                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1866                     ifstream in3;
1867                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1868                     in3.close();
1869                     reverse = tryPath;
1870                 }
1871             }
1872             
1873             //if you can't open it, try output location
1874             if (openReverse == 1) {
1875                 if (m->getOutputDir() != "") { //default path is set
1876                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1877                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1878                     ifstream in4;
1879                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1880                     reverse = tryPath;
1881                     in4.close();
1882                 }
1883             }
1884             
1885             if (openReverse == 1) { //can't find it
1886                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1887             }else{  in3.close();  }
1888             
1889             int openFindex = 0;
1890             if (findex != "") {
1891                 ifstream in4;
1892                 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1893                 
1894                 //if you can't open it, try default location
1895                 if (openFindex == 1) {
1896                     if (m->getDefaultPath() != "") { //default path is set
1897                         string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1898                         m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1899                         ifstream in5;
1900                         openFindex = m->openInputFile(tryPath, in5, "noerror");
1901                         in5.close();
1902                         findex = tryPath;
1903                     }
1904                 }
1905                 
1906                 //if you can't open it, try output location
1907                 if (openFindex == 1) {
1908                     if (m->getOutputDir() != "") { //default path is set
1909                         string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1910                         m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1911                         ifstream in6;
1912                         openFindex = m->openInputFile(tryPath, in6, "noerror");
1913                         findex = tryPath;
1914                         in6.close();
1915                     }
1916                 }
1917                 
1918                 if (openFindex == 1) { //can't find it
1919                     m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1920                 }
1921             }
1922             
1923             int openRindex = 0;
1924             if (rindex != "") {
1925                 ifstream in7;
1926                 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1927                 
1928                 //if you can't open it, try default location
1929                 if (openRindex == 1) {
1930                     if (m->getDefaultPath() != "") { //default path is set
1931                         string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1932                         m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1933                         ifstream in8;
1934                         openRindex = m->openInputFile(tryPath, in8, "noerror");
1935                         in8.close();
1936                         rindex = tryPath;
1937                     }
1938                 }
1939                 
1940                 //if you can't open it, try output location
1941                 if (openRindex == 1) {
1942                     if (m->getOutputDir() != "") { //default path is set
1943                         string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1944                         m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1945                         ifstream in9;
1946                         openRindex = m->openInputFile(tryPath, in9, "noerror");
1947                         rindex = tryPath;
1948                         in9.close();
1949                     }
1950                 }
1951                 
1952                 if (openRindex == 1) { //can't find it
1953                     m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1954                 }
1955             }
1956
1957             
1958             if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1959                 file2Group[files.size()] = group;
1960                 vector<string> pair;
1961                 pair.push_back(forward);
1962                 pair.push_back(reverse);
1963                 pair.push_back(findex);
1964                 pair.push_back(rindex);
1965                 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true;  }
1966                 files.push_back(pair);
1967             }
1968         }
1969         in.close();
1970         
1971         return files;
1972     }
1973     catch(exception& e) {
1974         m->errorOut(e, "MakeContigsCommand", "readFileNames");
1975         exit(1);
1976     }
1977 }
1978 //***************************************************************************************************************
1979 //illumina data requires paired forward and reverse data
1980 //BARCODE   atgcatgc   atgcatgc    groupName 
1981 //PRIMER   atgcatgc   atgcatgc    groupName  
1982 //PRIMER   atgcatgc   atgcatgc
1983 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname, map<string, string>& fastaFile2Group){
1984         try {
1985         if (m->debug) { m->mothurOut("[DEBUG]: oligosfile = " + oligosfile + "\n"); }
1986         
1987         bool allBlank = false;
1988         oligos->read(oligosfile, false);
1989         
1990         if (m->control_pressed) { return false; } //error in reading oligos
1991         
1992         if (oligos->hasPairedBarcodes()) {
1993             numFPrimers = oligos->getPairedPrimers().size();
1994             numBarcodes = oligos->getPairedBarcodes().size();
1995         }else {
1996             m->mothurOut("[ERROR]: make.contigs requires paired barcodes and primers. You can set one end to NONE if you are using an index file.\n"); m->control_pressed = true;
1997         }
1998     
1999         if (m->control_pressed) { return false; }
2000     
2001         numLinkers = oligos->getLinkers().size();
2002         numSpacers = oligos->getSpacers().size();
2003         numRPrimers = oligos->getReversePrimers().size();
2004         if (numLinkers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); }
2005         if (numSpacers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); }
2006        
2007         vector<string> groupNames = oligos->getGroupNames();
2008         if (groupNames.size() == 0) { allFiles = 0; allBlank = true;  }
2009         
2010         
2011         fastaFileNames.resize(oligos->getBarcodeNames().size());
2012                 for(int i=0;i<fastaFileNames.size();i++){
2013             for(int j=0;j<oligos->getPrimerNames().size();j++){  fastaFileNames[i].push_back(""); }
2014                 }
2015     
2016         if (allFiles) {
2017             set<string> uniqueNames; //used to cleanup outputFileNames
2018             map<int, oligosPair> barcodes = oligos->getPairedBarcodes();
2019             map<int, oligosPair> primers = oligos->getPairedPrimers();
2020             for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2021                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2022                     
2023                     string primerName = oligos->getPrimerName(itPrimer->first);
2024                     string barcodeName = oligos->getBarcodeName(itBar->first);
2025                     
2026                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
2027                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
2028                     else {
2029                         string comboGroupName = "";
2030                         string fastaFileName = "";
2031                         string qualFileName = "";
2032                         string nameFileName = "";
2033                         string countFileName = "";
2034                         
2035                         if(primerName == ""){
2036                             comboGroupName = barcodeName;
2037                         }else{
2038                             if(barcodeName == ""){
2039                                 comboGroupName = primerName;
2040                             }
2041                             else{
2042                                 comboGroupName = barcodeName + "." + primerName;
2043                             }
2044                         }
2045                         
2046                         
2047                         ofstream temp;
2048                         map<string, string> variables;
2049                         variables["[filename]"] = rootname;
2050                         variables["[tag]"] = comboGroupName;
2051                         fastaFileName = getOutputFileName("fasta", variables);
2052                         if (uniqueNames.count(fastaFileName) == 0) {
2053                             outputNames.push_back(fastaFileName);
2054                             outputTypes["fasta"].push_back(fastaFileName);
2055                             uniqueNames.insert(fastaFileName);
2056                             fastaFile2Group[fastaFileName] = comboGroupName;
2057                         }
2058                         
2059                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2060                         m->openOutputFile(fastaFileName, temp);         temp.close();
2061                         cout << fastaFileName << endl;
2062                     }
2063                 }
2064             }
2065         }
2066
2067         if (allBlank) {
2068             m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
2069             allFiles = false;
2070             return false;
2071         }
2072
2073         return true;
2074
2075         }
2076         catch(exception& e) {
2077                 m->errorOut(e, "MakeContigsCommand", "getOligos");
2078                 exit(1);
2079         }
2080 }
2081 //**********************************************************************************************************************
2082 vector<int> MakeContigsCommand::convertQual(string qual) {
2083         try {
2084                 vector<int> qualScores;
2085         bool negativeScores = false;
2086                 
2087                 for (int i = 0; i < qual.length(); i++) {
2088             
2089             int temp = 0;
2090             temp = int(qual[i]);
2091             if (format == "illumina") {
2092                 temp -= 64; //char '@'
2093             }else if (format == "illumina1.8+") {
2094                     temp -= int('!'); //char '!'
2095             }else if (format == "solexa") {
2096                 temp = int(convertTable[temp]); //convert to sanger
2097                 temp -= int('!'); //char '!'
2098             }else {
2099                 temp -= int('!'); //char '!'
2100             }
2101             
2102             if (temp < -5) { negativeScores = true; }
2103                         qualScores.push_back(temp);
2104                 }
2105                 
2106         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
2107         
2108                 return qualScores;
2109         }
2110         catch(exception& e) {
2111                 m->errorOut(e, "MakeContigsCommand", "convertQual");
2112                 exit(1);
2113         }
2114 }
2115
2116 //**********************************************************************************************************************
2117
2118
2119
2120