]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
added otulabels to heatmap.bin. made OTUlabels “smarter” so OTU01 and OTU001 are...
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22         CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23         CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57         helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60         helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
63         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
64                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65         helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process.  \n";
66         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
67         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
74         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
77         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
79         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
80
81         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82         helpString += "The make.contigs command should be in the following format: \n";
83                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
85                 return helpString;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
94     try {
95         string pattern = "";
96         
97         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
98         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
99         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
100         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
101         
102         return pattern;
103     }
104     catch(exception& e) {
105         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
106         exit(1);
107     }
108 }
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){       
111         try {
112                 abort = true; calledHelp = true; 
113                 setParameters();
114                 vector<string> tempOutNames;
115                 outputTypes["fasta"] = tempOutNames;
116         outputTypes["group"] = tempOutNames;
117         outputTypes["report"] = tempOutNames;
118     }
119         catch(exception& e) {
120                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option)  {
126         try {
127                 abort = false; calledHelp = false;
128         createFileGroup = false; createOligosGroup = false;
129         
130                 //allow user to run help
131                 if(option == "help") { help(); abort = true; calledHelp = true; }
132                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
133                 
134                 else {
135                         vector<string> myArray = setParameters();
136                         
137                         OptionParser parser(option);
138                         map<string, string> parameters = parser.getParameters(); 
139                         
140                         ValidParameters validParameter("pairwise.seqs");
141                         map<string, string>::iterator it;
142                         
143                         //check to make sure all parameters are valid for command
144                         for (it = parameters.begin(); it != parameters.end(); it++) { 
145                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
146                         }
147                         
148                         //initialize outputTypes
149                         vector<string> tempOutNames;
150                         outputTypes["fasta"] = tempOutNames;
151             outputTypes["report"] = tempOutNames;
152             outputTypes["group"] = tempOutNames;
153                         
154             
155                         //if the user changes the input directory command factory will send this info to us in the output parameter 
156                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
157                         if (inputDir == "not found"){   inputDir = "";          }
158                         else { 
159                                 string path;
160                 it = parameters.find("ffastq");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
166                                 }
167                 
168                 it = parameters.find("rfastq");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
174                                 }
175                 
176                 it = parameters.find("ffasta");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
182                                 }
183                 
184                 it = parameters.find("rfasta");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
190                                 }
191                 
192                 it = parameters.find("fqfile");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
198                                 }
199                 
200                 it = parameters.find("rqfile");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
206                                 }
207                 
208                 it = parameters.find("file");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
214                                 }
215                 
216                 it = parameters.find("oligos");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
222                                 }
223                 
224                 it = parameters.find("findex");
225                                 //user has given a template file
226                                 if(it != parameters.end()){
227                                         path = m->hasPath(it->second);
228                                         //if the user has not given a path then, add inputdir. else leave path alone.
229                                         if (path == "") {       parameters["findex"] = inputDir + it->second;           }
230                                 }
231                 
232                 it = parameters.find("rindex");
233                                 //user has given a template file
234                                 if(it != parameters.end()){
235                                         path = m->hasPath(it->second);
236                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                         if (path == "") {       parameters["rindex"] = inputDir + it->second;           }
238                                 }
239             }
240             
241             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242                         if (ffastqfile == "not open") {  abort = true; }        
243                         else if (ffastqfile == "not found") { ffastqfile = ""; }
244                         
245                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246                         if (rfastqfile == "not open") {  abort = true; }        
247                         else if (rfastqfile == "not found") { rfastqfile = "";  }
248             
249             ffastafile = validParameter.validFile(parameters, "ffasta", true);
250                         if (ffastafile == "not open") {  abort = true; }        
251                         else if (ffastafile == "not found") { ffastafile = ""; }
252                         
253                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
254                         if (rfastafile == "not open") {  abort = true; }        
255                         else if (rfastafile == "not found") { rfastafile = "";  }
256             
257             fqualfile = validParameter.validFile(parameters, "fqfile", true);
258                         if (fqualfile == "not open") {  abort = true; } 
259                         else if (fqualfile == "not found") { fqualfile = ""; }
260                         
261                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
262                         if (rqualfile == "not open") {  abort = true; } 
263                         else if (rqualfile == "not found") { rqualfile = "";  }
264             
265             file = validParameter.validFile(parameters, "file", true);
266                         if (file == "not open") {  abort = true; }      
267                         else if (file == "not found") { file = "";  }
268             
269             //provide at least
270             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
280             }
281             
282             oligosfile = validParameter.validFile(parameters, "oligos", true);
283                         if (oligosfile == "not found")      {   oligosfile = "";        }
284                         else if(oligosfile == "not open")   {   abort = true;       } 
285                         else {   m->setOligosFile(oligosfile);          }
286             
287             findexfile = validParameter.validFile(parameters, "findex", true);
288                         if (findexfile == "not found")      {   findexfile = "";        }
289                         else if(findexfile == "not open")   {   abort = true;       }
290             
291             rindexfile = validParameter.validFile(parameters, "rindex", true);
292                         if (rindexfile == "not found")      {   rindexfile = "";        }
293                         else if(rindexfile == "not open")   {   abort = true;       }
294         
295             if ((rindexfile != "") || (findexfile != "")) {
296                                 if (oligosfile == ""){
297                                         oligosfile = m->getOligosFile();
298                                         if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n");  }
299                                         else {
300                         m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
301                                         }
302                                 }
303                 
304                 //can only use an index file with the fastq parameters not fasta and qual
305                 if ((ffastafile != "") || (rfastafile != "")) {
306                     m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
307                 }
308                         }
309             
310             //if the user changes the output directory command factory will send this info to us in the output parameter 
311                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
312                  outputDir = ""; 
313             }
314                         
315
316                         //check for optional parameter and set defaults
317                         // ...at some point should added some additional type checking...
318                         string temp;
319                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
320                         m->mothurConvert(temp, match);  
321                         
322                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
323                         m->mothurConvert(temp, misMatch);  
324             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
325                         
326                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
327                         m->mothurConvert(temp, gapOpen);  
328             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
329                         
330                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
331                         m->mothurConvert(temp, gapExtend); 
332             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
333                         
334             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
335                         m->mothurConvert(temp, insert); 
336             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
337
338             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
339                         m->mothurConvert(temp, deltaq);
340             
341                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
342                         m->setProcessors(temp);
343                         m->mothurConvert(temp, processors);
344             
345             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
346                         m->mothurConvert(temp, bdiffs);
347                         
348                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
349                         m->mothurConvert(temp, pdiffs);
350             
351   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
352 //                      m->mothurConvert(temp, ldiffs);
353             ldiffs = 0;
354             
355  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
356  //           m->mothurConvert(temp, sdiffs);
357             sdiffs = 0;
358                         
359                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
360                         m->mothurConvert(temp, tdiffs);
361                         
362                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
363
364             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
365                         allFiles = m->isTrue(temp);
366             
367             
368             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
369                         trimOverlap = m->isTrue(temp);
370                         
371                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
372                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
373             
374             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
375             
376             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
377                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
378                                 abort=true;
379                         }
380             
381             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382             for (int i = -64; i < 65; i++) { 
383                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384                 convertTable.push_back(temp);
385             }
386         }
387                 
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
391                 exit(1);
392         }
393 }
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
396         try {
397                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
398         
399         //read ffastq and rfastq files creating fasta and qual files.
400         //this function will create a forward and reverse, fasta and qual files for each processor.
401         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
402         unsigned long int numReads = 0;
403         int start = time(NULL);
404         longestBase = 1000;
405         m->mothurOut("Reading fastq data...\n"); 
406         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407         m->mothurOut("Done.\n");
408        
409         if (m->control_pressed) { return 0; }
410         
411         map<string, string> cvars;
412         string compOutputDir = outputDir;
413         if (outputDir == "") { compOutputDir = m->hasPath(file); }
414         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
415         cvars["[tag]"] = "";
416         string compositeGroupFile = getOutputFileName("group",cvars);
417         cvars["[tag]"] = "trim";
418         string compositeFastaFile = getOutputFileName("fasta",cvars);
419         cvars["[tag]"] = "scrap";
420         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
421         cvars["[tag]"] = "";
422         string compositeMisMatchFile = getOutputFileName("report",cvars);
423         
424         if (filesToProcess.size() > 1) { //clear files for append below
425             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
432         }
433         
434         map<string, int> totalGroupCounts;
435         
436         for (int l = 0; l < filesToProcess.size(); l++) {
437             
438             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
439             
440             groupCounts.clear();
441             groupMap.clear();
442             vector<vector<string> > fastaFileNames;
443             createOligosGroup = false;
444             string outputGroupFileName;
445             map<string, string> variables; 
446             string thisOutputDir = outputDir;
447             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449             variables["[tag]"] = "";
450             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
451             if (createOligosGroup || createFileGroup) {
452                 outputGroupFileName = getOutputFileName("group",variables);
453             }
454             
455             //give group in file file precedence
456             if (createFileGroup) {  createOligosGroup = false; }
457             
458             variables["[tag]"] = "trim";
459             string outFastaFile = getOutputFileName("fasta",variables);
460             variables["[tag]"] = "scrap";
461             string outScrapFastaFile = getOutputFileName("fasta",variables);
462             variables["[tag]"] = "";
463             string outMisMatchFile = getOutputFileName("report",variables);
464                         
465             m->mothurOut("Making contigs...\n"); 
466             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
467             
468             //remove temp fasta and qual files
469             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
470             
471             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
472             
473             if(allFiles){
474                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475                 map<string, string>::iterator it;
476                 set<string> namesToRemove;
477                 for(int i=0;i<fastaFileNames.size();i++){
478                     for(int j=0;j<fastaFileNames[0].size();j++){
479                         if (fastaFileNames[i][j] != "") {
480                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481                                 if(m->isBlank(fastaFileNames[i][j])){
482                                     m->mothurRemove(fastaFileNames[i][j]);
483                                     namesToRemove.insert(fastaFileNames[i][j]);
484                                 }else{  
485                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
486                                     if (it == uniqueFastaNames.end()) { 
487                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
488                                     }   
489                                 }
490                             }
491                         }
492                     }
493                 }
494                 
495                 //remove names for outputFileNames, just cleans up the output
496                 vector<string> outputNames2;
497                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498                 outputNames = outputNames2;
499                 
500                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
501                     ifstream in;
502                     m->openInputFile(it->first, in);
503                     
504                     ofstream out;
505                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507                     m->openOutputFile(thisGroupName, out);
508                     
509                     while (!in.eof()){
510                         if (m->control_pressed) { break; }
511                         
512                         Sequence currSeq(in); m->gobble(in);
513                         out << currSeq.getName() << '\t' << it->second << endl;
514                     }
515                     out.close();
516                     in.close();
517                 }
518             }
519             
520             if (createFileGroup || createOligosGroup) {
521                 ofstream outGroup;
522                 m->openOutputFile(outputGroupFileName, outGroup);
523                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
525                 }
526                 outGroup.close();
527             }
528             
529             if (filesToProcess.size() > 1) { //merge into large combo files
530                 if (createFileGroup || createOligosGroup) {
531                     if (l == 0) {
532                         ofstream outCGroup;
533                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
535                     }
536                     m->appendFiles(outputGroupFileName, compositeGroupFile);
537                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
538                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
539                     
540                     for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541                         map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542                         if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543                         else { itTemp->second += itGroups->second; } //existing group, update total
544                     }
545                 }
546                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
547                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
548                 m->appendFiles(outFastaFile, compositeFastaFile);
549                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
550                 if (!allFiles) {
551                     m->mothurRemove(outMisMatchFile);
552                     m->mothurRemove(outFastaFile);
553                     m->mothurRemove(outScrapFastaFile);
554                 }else {
555                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
558                 }
559             }else {
560                 totalGroupCounts = groupCounts;
561                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564                 if (createFileGroup || createOligosGroup) {
565                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
566                 }
567             }
568             m->mothurOut("Done.\n");
569         }
570         
571         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
572         
573         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
574         
575                 //output group counts
576                 m->mothurOutEndLine();
577                 int total = 0;
578                 if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
579                 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
581                 }
582                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
583                 
584                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
585         
586         string currentFasta = "";
587                 itTypes = outputTypes.find("fasta");
588                 if (itTypes != outputTypes.end()) {
589                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
590                 }
591         
592         string currentGroup = "";
593                 itTypes = outputTypes.find("group");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
596                 }
597                 
598         //output files created by command
599                 m->mothurOutEndLine();
600                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
602                 m->mothurOutEndLine();
603
604         return 0;
605     }
606         catch(exception& e) {
607                 m->errorOut(e, "MakeContigsCommand", "execute");
608                 exit(1);
609         }
610 }
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
613         try {
614         vector< vector< vector<string> > > filesToProcess;
615         
616         if (ffastqfile != "") { //reading one file
617             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618             //adjust for really large processors or really small files
619             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620             if (numReads < processors) { 
621                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622                 files.resize(numReads);
623                 processors = numReads; 
624             }
625             filesToProcess.push_back(files);
626         }else if (file != "") { //reading multiple files
627             //return only valid pairs
628             vector< vector<string> > filePairsToProcess = readFileNames(file);
629             
630             if (m->control_pressed) { return filesToProcess; }
631             
632             if (filePairsToProcess.size() != 0) {
633                 for (int i = 0; i < filePairsToProcess.size(); i++) {
634                     
635                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
636                     
637                     unsigned long int thisFilesReads;
638                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
639                     
640                     //adjust for really large processors or really small files
641                     if (thisFilesReads < processors) { 
642                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
643                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644                         //remove from file2Group if necassary
645                         map<int, string> cFile2Group;
646                         for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647                             if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648                             else if ((it->first) == i) { } //do nothing, we removed files for i
649                             else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
650                         }
651                         file2Group = cFile2Group;
652                     }else {
653                         filesToProcess.push_back(files);
654                         numReads += thisFilesReads;
655                     }
656                 }
657                 //all files are bad
658                 if (numReads == 0) {  m->control_pressed = true; }
659             }
660         }else if (ffastafile != "") {
661             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662             //adjust for really large processors or really small files
663             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664             if (numReads < processors) { 
665                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666                 files.resize(numReads);
667                 processors = numReads; 
668             }
669             filesToProcess.push_back(files);
670         }else { m->control_pressed = true; } //should not get here
671         
672         return filesToProcess;
673     }
674         catch(exception& e) {
675                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
676                 exit(1);
677         }
678 }
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
681         try {
682                 int num = 0;
683                 vector<int> processIDS;
684         string group = "";
685         map<int, string>::iterator it = file2Group.find(index);
686         if (it != file2Group.end()) { group = it->second; }
687         
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
689                 int process = 0;
690                 
691                 //loop through and create all the processes you want
692                 while (process != processors-1) {
693                         int pid = fork();
694                         
695                         if (pid > 0) {
696                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
697                                 process++;
698                         }else if (pid == 0){
699                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
700                 
701                                 if(allFiles){
702                                         ofstream temp;
703                     
704                                         for(int i=0;i<tempFASTAFileNames.size();i++){
705                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706                                                         if (tempFASTAFileNames[i][j] != "") {
707                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
708                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
709                                                         }
710                                                 }
711                                         }
712                                 }
713                 
714                                 num = driver(files[process], 
715                              outputFasta + toString(getpid()) + ".temp", 
716                              outputScrapFasta + toString(getpid()) + ".temp", 
717                              outputMisMatches + toString(getpid()) + ".temp",
718                              tempFASTAFileNames, process, group);
719                                 
720                                 //pass groupCounts to parent
721                 ofstream out;
722                 string tempFile = toString(getpid()) + ".num.temp";
723                 m->openOutputFile(tempFile, out);
724                 out << num << endl;
725                                 if (createFileGroup || createOligosGroup) {
726                                         out << groupCounts.size() << endl;
727                                         
728                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729                                                 out << it->first << '\t' << it->second << endl;
730                                         }
731                     
732                     out << groupMap.size() << endl;
733                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734                                                 out << it->first << '\t' << it->second << endl;
735                                         }
736                                 }
737                 out.close();
738                                 
739                                 exit(0);
740                         }else { 
741                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
742                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
743                                 exit(0);
744                         }
745                 }
746                 
747         ofstream temp;
748                 m->openOutputFile(outputFasta, temp);           temp.close();
749         m->openOutputFile(outputScrapFasta, temp);              temp.close();
750                 
751                 //do my part
752                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
753                 
754                 //force parent to wait until all the processes are done
755                 for (int i=0;i<processIDS.size();i++) { 
756                         int temp = processIDS[i];
757                         wait(&temp);
758                 }
759         
760                 for (int i = 0; i < processIDS.size(); i++) {
761             ifstream in;
762             string tempFile = toString(processIDS[i]) + ".num.temp";
763             m->openInputFile(tempFile, in);
764             int tempNum;
765             in >> tempNum; num += tempNum; m->gobble(in);
766             
767                         if (createFileGroup || createOligosGroup) {
768                                 string group;
769                                 in >> tempNum; m->gobble(in);
770                                 
771                                 if (tempNum != 0) {
772                                         for (int j = 0; j < tempNum; j++) { 
773                         int groupNum;
774                                                 in >> group >> groupNum; m->gobble(in);
775                         
776                                                 map<string, int>::iterator it = groupCounts.find(group);
777                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
778                                                 else { groupCounts[it->first] += groupNum; }
779                                         }
780                                 }
781                 in >> tempNum; m->gobble(in);
782                 if (tempNum != 0) {
783                                         for (int j = 0; j < tempNum; j++) { 
784                         string group, seqName;
785                                                 in >> seqName >> group; m->gobble(in);
786                         
787                                                 map<string, string>::iterator it = groupMap.find(seqName);
788                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
789                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
790                                         }
791                                 }
792                         }
793             in.close(); m->mothurRemove(tempFile);
794         }
795     #else
796         
797         //////////////////////////////////////////////////////////////////////////////////////////////////////
798                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
799                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
800                 //////////////////////////////////////////////////////////////////////////////////////////////////////
801                 
802                 vector<contigsData*> pDataArray; 
803                 DWORD   dwThreadIdArray[processors-1];
804                 HANDLE  hThreadArray[processors-1]; 
805                 
806                 //Create processor worker threads.
807                 for( int h=0; h<processors-1; h++ ){
808                         string extension = "";
809                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
811                         
812             if(allFiles){
813                 ofstream temp;
814                 
815                 for(int i=0;i<tempFASTAFileNames.size();i++){
816                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
817                         if (tempFASTAFileNames[i][j] != "") {
818                             tempFASTAFileNames[i][j] += extension;
819                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
820                         }
821                     }
822                 }
823             }
824                                  
825                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826                         pDataArray.push_back(tempcontig);
827             
828                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
829                 }
830         
831         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
832
833         if(allFiles){
834             ofstream temp;
835             string extension = toString(processors-1) + ".temp";
836             
837             for(int i=0;i<tempFASTAFileNames.size();i++){
838                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839                     if (tempFASTAFileNames[i][j] != "") {
840                         tempFASTAFileNames[i][j] += extension;
841                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
842                     }
843                 }
844             }
845         }
846
847                 //parent do my part
848                 ofstream temp;
849                 m->openOutputFile(outputFasta, temp);           temp.close();
850         m->openOutputFile(outputScrapFasta, temp);              temp.close();
851         
852         //do my part
853         processIDS.push_back(processors-1);
854                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
855         
856                 //Wait until all threads have terminated.
857                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
858                 
859                 //Close all thread handles and free memory allocations.
860                 for(int i=0; i < pDataArray.size(); i++){
861                         num += pDataArray[i]->count;
862             if (!pDataArray[i]->done) {
863                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
864             }
865             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866                 map<string, int>::iterator it2 = groupCounts.find(it->first);
867                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868                 else { groupCounts[it->first] += it->second; }
869             }
870             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871                 map<string, string>::iterator it2 = groupMap.find(it->first);
872                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
873                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
874             }
875             CloseHandle(hThreadArray[i]);
876                         delete pDataArray[i];
877         }
878                                 
879     #endif      
880         
881         for (int i = 0; i < processIDS.size(); i++) {
882                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
884                         
885                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
887             
888             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
890             
891             if(allFiles){
892                                 for(int j=0;j<fastaFileNames.size();j++){
893                                         for(int k=0;k<fastaFileNames[j].size();k++){
894                                                 if (fastaFileNames[j][k] != "") {
895                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
897                                                 }
898                                         }
899                                 }
900                         }
901                 }
902                 
903                 return num;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
907                 exit(1);
908         }
909 }
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
912     try {
913         
914         Alignment* alignment;
915         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
916                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
917         
918         int num = 0;
919         string thisffastafile = files[0];
920         string thisfqualfile = files[1];
921         string thisrfastafile = files[2];
922         string thisrqualfile = files[3];
923         string thisfindexfile = files[4];
924         string thisrindexfile = files[5];
925         
926         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
927         
928         ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929         ofstream outFasta, outMisMatch, outScrapFasta;
930         m->openInputFile(thisffastafile, inFFasta);
931         m->openInputFile(thisrfastafile, inRFasta);
932         if (thisfqualfile != "") {
933             m->openInputFile(thisfqualfile, inFQual);
934             m->openInputFile(thisrqualfile, inRQual);
935         }
936         
937         if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex);  }
938         if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex);  }
939         
940         m->openOutputFile(outputFasta, outFasta);
941         m->openOutputFile(outputScrapFasta, outScrapFasta);
942         m->openOutputFile(outputMisMatches, outMisMatch);
943         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
944         
945         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
946         
947         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
948             
949             if (m->control_pressed) { break; }
950             
951             int success = 1;
952             string trashCode = "";
953             int currentSeqsDiffs = 0;
954
955             //read seqs and quality info
956             Sequence fSeq(inFFasta); m->gobble(inFFasta);
957             Sequence rSeq(inRFasta); m->gobble(inRFasta);
958             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959             if (thisfqualfile != "") {
960                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
962             }
963             Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
964             if (thisfindexfile != "") {
965                 Sequence temp(inFIndex); m->gobble(inFIndex);
966                 findexBarcode.setAligned(temp.getAligned());
967             }
968             
969             if (thisrindexfile != "") {
970                 Sequence temp(inRIndex); m->gobble(inRIndex);
971                 rindexBarcode.setAligned(temp.getAligned());
972             }
973             
974             int barcodeIndex = 0;
975             int primerIndex = 0;
976                         
977             if(barcodes.size() != 0){
978                 if (thisfqualfile != "") {
979                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
980                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
981                     }else {
982                         success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
983                     }
984                 }else {
985                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
986                 }
987                 if(success > bdiffs)            {       trashCode += 'b';       }
988                 else{ currentSeqsDiffs += success;  }
989             }
990             
991             if(primers.size() != 0){
992                 if (thisfqualfile != "") {
993                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
994                 }else {
995                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
996                 }
997                 if(success > pdiffs)            {       trashCode += 'f';       }
998                 else{ currentSeqsDiffs += success;  }
999             }
1000             
1001             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
1002             
1003             //flip the reverse reads
1004             rSeq.reverseComplement();
1005             if (thisfqualfile != "") { rQual->flipQScores(); }
1006
1007             //pairwise align
1008             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011             fSeq.setAligned(alignment->getSeqAAln());
1012             rSeq.setAligned(alignment->getSeqBAln());
1013             int length = fSeq.getAligned().length();
1014             
1015             //traverse alignments merging into one contiguous seq
1016             string contig = "";
1017             int numMismatches = 0;
1018             string seq1 = fSeq.getAligned();
1019             string seq2 = rSeq.getAligned();
1020             vector<int> scores1, scores2; 
1021             if (thisfqualfile != "") {
1022                 scores1 = fQual->getQualityScores();
1023                 scores2 = rQual->getQualityScores();
1024                 delete fQual; delete rQual;
1025             }
1026             
1027             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028             int overlapStart = fSeq.getStartPos();
1029             int seq2Start = rSeq.getStartPos();
1030             
1031             //bigger of the 2 starting positions is the location of the overlapping start
1032             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033                 overlapStart = seq2Start; 
1034                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
1035             }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
1037             }
1038             
1039             int seq1End = fSeq.getEndPos();
1040             int seq2End = rSeq.getEndPos();
1041             int overlapEnd = seq1End;
1042             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
1043             
1044             int oStart = contig.length();
1045             //cout << fSeq.getAligned()  << endl; cout << rSeq.getAligned() << endl;
1046             for (int i = overlapStart; i < overlapEnd; i++) {
1047                 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1048                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1049                     contig += seq1[i];
1050                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1051                     if (thisfqualfile != "") {
1052                         if (scores2[BBaseMap[i]] <= insert) { } //
1053                         else { contig += seq2[i];  }
1054                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
1055                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1056                     if (thisfqualfile != "") {
1057                         if (scores1[ABaseMap[i]] <= insert) { } //
1058                         else { contig += seq1[i];  }
1059                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
1060                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1061                     if (thisfqualfile != "") {
1062                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1063                             char c = seq1[i];
1064                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1065                             contig += c;
1066                         }else { //if no, base becomes n
1067                             contig += 'N';
1068                         }
1069                         numMismatches++;
1070                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1071                 }else { //should never get here
1072                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1073                 }
1074             }
1075             int oend = contig.length();
1076             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1077                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
1078             }else { //seq2 ends before seq1 so take from overlap to length from seq1
1079                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
1080             }
1081             //cout << contig << endl;
1082             //exit(1);
1083             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
1084             
1085             if(trashCode.length() == 0){
1086                 bool ignore = false;
1087                 
1088                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1089                 
1090                 if (createOligosGroup) {
1091                     if(barcodes.size() != 0){
1092                         string thisGroup = barcodeNameVector[barcodeIndex];
1093                         if (primers.size() != 0) { 
1094                             if (primerNameVector[primerIndex] != "") { 
1095                                 if(thisGroup != "") {
1096                                     thisGroup += "." + primerNameVector[primerIndex]; 
1097                                 }else {
1098                                     thisGroup = primerNameVector[primerIndex]; 
1099                                 }
1100                             } 
1101                         }
1102                         
1103                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1104                         
1105                         int pos = thisGroup.find("ignore");
1106                         if (pos == string::npos) {
1107                             groupMap[fSeq.getName()] = thisGroup; 
1108                         
1109                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1110                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1111                             else { groupCounts[it->first] ++; }
1112                         }else { ignore = true; }
1113                         
1114                     }
1115                 }else if (createFileGroup) {
1116                     int pos = group.find("ignore");
1117                     if (pos == string::npos) {
1118                         groupMap[fSeq.getName()] = group;
1119                         
1120                         map<string, int>::iterator it = groupCounts.find(group);
1121                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1122                         else { groupCounts[it->first] ++; }
1123                     }else { ignore = true; }
1124                 }
1125                 if (m->debug) { m->mothurOut("\n"); }
1126                 
1127                 if(allFiles && !ignore){
1128                     ofstream output;
1129                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1130                     output << ">" << fSeq.getName() << endl << contig << endl;
1131                     output.close();
1132                 }
1133                 
1134                 //output
1135                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1136                 int numNs = 0;
1137                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1138                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1139             }else {
1140                 //output
1141                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1142             }
1143             num++;
1144             
1145                         //report progress
1146                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1147                 }
1148         
1149                 //report progress
1150                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1151         
1152         inFFasta.close();
1153         inRFasta.close();
1154         outFasta.close();
1155         outScrapFasta.close();
1156         outMisMatch.close();
1157         if (thisfqualfile != "") {
1158             inFQual.close();
1159             inRQual.close();
1160         }
1161         delete alignment;
1162         
1163         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1164     
1165         return num;
1166     }
1167         catch(exception& e) {
1168                 m->errorOut(e, "MakeContigsCommand", "driver");
1169                 exit(1);
1170         }
1171 }
1172 //**********************************************************************************************************************
1173 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1174     try {
1175         vector< vector<string> > files;
1176         //maps processors number to file pointer
1177         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1178         map<int, vector<ofstream*> >::iterator it;
1179         
1180         //create files to write to
1181         for (int i = 0; i < processors; i++) {
1182             vector<ofstream*> temp;
1183             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1184             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1185             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1186             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1187             ofstream* outFI = new ofstream;     temp.push_back(outFI);  
1188             ofstream* outRI = new ofstream;     temp.push_back(outRI);  
1189             tempfiles[i] = temp;
1190             
1191             vector<string> names;
1192             string thisOutputDir = outputDir;
1193             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1194             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1195             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1196             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1197             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1198             string findexfilename = ""; string rindexfilename = "";
1199             noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1200             if (findex != "") {  findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI);  noneOk = true; }
1201             if (rindex != "") {  rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp";  m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1202             names.push_back(ffastafilename); names.push_back(fqualfilename);
1203             names.push_back(rfastafilename); names.push_back(rqualfilename);
1204             names.push_back(findexfilename); names.push_back(rindexfilename);
1205             files.push_back(names);
1206             
1207             m->openOutputFile(ffastafilename, *outFF);
1208             m->openOutputFile(rfastafilename, *outRF);
1209             m->openOutputFile(fqualfilename, *outFQ);
1210             m->openOutputFile(rqualfilename, *outRQ);
1211         }
1212         
1213         if (m->control_pressed) {
1214             //close files, delete ofstreams
1215             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1216             //remove files
1217             for (int i = 0; i < files.size(); i++) {  
1218                 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") {  m->mothurRemove(files[i][j]); }  }
1219             }
1220         }
1221         
1222         ifstream inForward;
1223         m->openInputFile(ffastq, inForward);
1224         
1225         ifstream inReverse;
1226         m->openInputFile(rfastq, inReverse);
1227         
1228         ifstream infIndex, inrIndex;
1229         bool findexIsGood = false;
1230         bool rindexIsGood = false;
1231         if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1232         if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1233         
1234         count = 0;
1235         map<string, fastqRead> uniques;
1236         map<string, fastqRead> iUniques;
1237         map<string, pairFastqRead> pairUniques;
1238         map<string, fastqRead>::iterator itUniques;
1239         while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1240             
1241             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1242             
1243             //get a read from forward and reverse fastq files
1244             bool ignoref, ignorer, ignorefi, ignoreri;
1245             fastqRead thisFread, thisRread, thisFIread, thisRIread;
1246             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1247             else { ignoref = true; }
1248             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1249             else { ignorer = true; }
1250             if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1251             else { ignorefi = true; }
1252             if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri);  if (inrIndex.eof()) { rindexIsGood = false; } }
1253             else { ignoreri = true; }
1254             
1255             bool allowOne = false;
1256             if ((findex == "") || (rindex == "")) { allowOne = true; }
1257             vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1258             vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1259             
1260             //add in index info if provided
1261             vector<pairFastqRead> reads = frReads;
1262             if ((findex != "") || (rindex != "")) {  reads = mergeReads(frReads, friReads, pairUniques); }
1263             
1264             for (int i = 0; i < reads.size(); i++) {
1265                 fastqRead fread = reads[i].forward;
1266                 fastqRead rread = reads[i].reverse;
1267                 fastqRead firead = reads[i].findex;
1268                 fastqRead riread = reads[i].rindex;
1269                 
1270                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1271                
1272                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1273                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1274                     
1275                     //if the reads are okay write to output files
1276                     int process = count % processors;
1277                     
1278                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1279                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1280                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1281                     *(tempfiles[process][1]) << endl;
1282                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1283                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1284                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1285                     *(tempfiles[process][3]) << endl;
1286                     if (findex != "") {  *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1287                     if (rindex != "") {  *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1288                 
1289                     count++;
1290                     
1291                     //report progress
1292                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1293                 //}
1294             }
1295                 }
1296                 //report progress
1297                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1298         
1299         if (uniques.size() != 0) {
1300             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1301                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1302             }
1303             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1304                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1305             }
1306             m->mothurOutEndLine();
1307         }
1308         
1309         //close files, delete ofstreams
1310         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1311         inForward.close();
1312         inReverse.close();
1313         if (findex != "") { infIndex.close(); }
1314         if (rindex != "") { inrIndex.close(); }
1315        
1316         return files;
1317     }
1318     catch(exception& e) {
1319         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1320         exit(1);
1321     }
1322 }
1323 //**********************************************************************************************************************
1324 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1325     try {
1326         vector< vector<string> > files;
1327         //maps processors number to file pointer
1328         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1329         map<int, vector<ofstream*> >::iterator it;
1330         
1331         //create files to write to
1332         for (int i = 0; i < processors; i++) {
1333             vector<ofstream*> temp;
1334             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1335             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1336             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1337             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1338             tempfiles[i] = temp;
1339             
1340             vector<string> names;
1341             string thisOutputDir = outputDir;
1342             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1343             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1344             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1345             string fqualfilename = "";
1346             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1347             string rqualfilename = "";
1348             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1349             string findexfilename = ""; string rindexfilename = "";
1350             names.push_back(ffastafilename); names.push_back(fqualfilename);
1351             names.push_back(rfastafilename); names.push_back(rqualfilename);
1352             names.push_back(findexfilename); names.push_back(rindexfilename);
1353             files.push_back(names);
1354             
1355             m->openOutputFile(ffastafilename, *outFF);
1356             m->openOutputFile(rfastafilename, *outRF);
1357         }
1358         
1359         if (m->control_pressed) {
1360             //close files, delete ofstreams
1361             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1362             //remove files
1363             for (int i = 0; i < files.size(); i++) {  
1364                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1365             }
1366         }
1367         
1368         ifstream inForwardFasta;
1369         m->openInputFile(ffasta, inForwardFasta);
1370         
1371         ifstream inReverseFasta;
1372         m->openInputFile(rfasta, inReverseFasta);
1373         
1374         ifstream inForwardQual, inReverseQual;
1375         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1376         
1377         count = 0;
1378         map<string, fastqRead> uniques;
1379         map<string, fastqRead>::iterator itUniques;
1380         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1381             
1382             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1383             
1384             //get a reads from forward and reverse fasta files
1385             bool ignoref, ignorer;
1386             fastqRead thisFread, thisRread;
1387             if (!inForwardFasta.eof()) {  
1388                 ignoref = false; 
1389                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1390                 thisFread.name = temp.getName();
1391                 thisFread.sequence = temp.getUnaligned();
1392             }else { ignoref = true; }
1393             if (!inReverseFasta.eof()) {  
1394                 ignorer = false; 
1395                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1396                 thisRread.name = temp.getName();
1397                 thisRread.sequence = temp.getUnaligned();  
1398             }else { ignorer = true; }
1399             
1400             //get qual reads if given
1401             if (fqualfile != "") {
1402                 if (!inForwardQual.eof() && !ignoref) {  
1403                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1404                     //if forward files dont match ignore read
1405                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1406                     else { thisFread.scores = temp.getQualityScores(); }
1407                 }else { ignoref = true; }
1408                 if (!inReverseQual.eof() && !ignorer) {  
1409                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1410                     //if reverse files dont match ignore read
1411                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1412                     else { thisRread.scores = temp.getQualityScores(); }
1413                 }else { ignorer = true; }
1414             }
1415             
1416             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1417             
1418             for (int i = 0; i < reads.size(); i++) {
1419                 fastqRead fread = reads[i].forward;
1420                 fastqRead rread = reads[i].reverse;
1421                 
1422                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1423                 
1424                // if (checkReads(fread, rread, ffasta, rfasta)) {
1425                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1426                     
1427                     //if the reads are okay write to output files
1428                     int process = count % processors;
1429                     
1430                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1431                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1432                     if (fqualfile != "") { //if you have quality info, print it
1433                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1434                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1435                         *(tempfiles[process][1]) << endl;
1436                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1437                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1438                         *(tempfiles[process][3]) << endl;
1439                     }
1440                     count++;
1441                     
1442                     //report progress
1443                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1444                 //}
1445             }
1446                 }
1447                 //report progress
1448                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1449         
1450         if (uniques.size() != 0) {
1451             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1452                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1453             }
1454             m->mothurOutEndLine();
1455         }
1456         
1457         //close files, delete ofstreams
1458         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1459         inReverseFasta.close(); 
1460         inForwardFasta.close(); 
1461         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1462         
1463         return files;
1464     }
1465     catch(exception& e) {
1466         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1467         exit(1);
1468     }
1469 }
1470 //**********************************************************************************************************************
1471 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1472     try {
1473         vector<pairFastqRead> reads;
1474         map<string, fastqRead>::iterator itUniques;
1475             
1476         if (!ignoref && !ignorer) {
1477             if (forward.name == reverse.name) { 
1478                 pairFastqRead temp(forward, reverse);
1479                 reads.push_back(temp);
1480             }else {
1481                 bool match = false;
1482                 //if no match are the names only different by 1 and 2?
1483                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1484                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1485                 if (tempFRead == tempRRead) {
1486                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1487                         forward.name = tempFRead;
1488                         reverse.name = tempRRead;
1489                         pairFastqRead temp(forward, reverse);
1490                         reads.push_back(temp);
1491                         match = true;
1492                     }
1493                 }
1494                 
1495                 if (!match) {
1496                     //look for forward pair
1497                     itUniques = uniques.find(forward.name);
1498                     if (itUniques != uniques.end()) {  //we have the pair for this read
1499                         pairFastqRead temp(forward, itUniques->second);
1500                         reads.push_back(temp);
1501                         uniques.erase(itUniques);
1502                     }else { //save this read for later
1503                         uniques[forward.name] = forward;
1504                     }
1505                     
1506                     //look for reverse pair
1507                     itUniques = uniques.find(reverse.name);
1508                     if (itUniques != uniques.end()) {  //we have the pair for this read
1509                         pairFastqRead temp(itUniques->second, reverse);
1510                         reads.push_back(temp);
1511                         uniques.erase(itUniques);
1512                     }else { //save this read for later
1513                         uniques[reverse.name] = reverse;
1514                     }
1515                 }
1516                                 
1517             }
1518         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1519             if (allowOne) {
1520                 fastqRead dummy;
1521                 pairFastqRead temp(forward, dummy);
1522                 reads.push_back(temp);
1523             }else {
1524                 //look for forward pair
1525                 itUniques = uniques.find(forward.name);
1526                 if (itUniques != uniques.end()) {  //we have the pair for this read
1527                     pairFastqRead temp(forward, itUniques->second);
1528                     reads.push_back(temp);
1529                     uniques.erase(itUniques);
1530                 }else { //save this read for later
1531                     uniques[forward.name] = forward;
1532                 }
1533             }
1534         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1535             if (allowOne) {
1536                 fastqRead dummy;
1537                 pairFastqRead temp(dummy, reverse);
1538                 reads.push_back(temp);
1539             }else {
1540                 //look for reverse pair
1541                 itUniques = uniques.find(reverse.name);
1542                 if (itUniques != uniques.end()) {  //we have the pair for this read
1543                     pairFastqRead temp(itUniques->second, reverse);
1544                     reads.push_back(temp);
1545                     uniques.erase(itUniques);
1546                 }else { //save this read for later
1547                     uniques[reverse.name] = reverse;
1548                 }
1549             }
1550         }//else ignore both and do nothing
1551         
1552         return reads;
1553     }
1554     catch(exception& e) {
1555         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1556         exit(1);
1557     }
1558 }
1559 //**********************************************************************************************************************
1560 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1561 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1562     try {
1563         vector<pairFastqRead> reads;
1564         map<string, pairFastqRead>::iterator itUniques;
1565         
1566         set<int> foundIndexes;
1567         for (int i = 0; i < thisReads.size(); i++) {
1568             bool found = false;
1569             for (int j = 0; j < indexes.size(); j++) {
1570                 
1571                 //incase only one index
1572                 string indexName = indexes[j].forward.name;
1573                 if (indexName == "") { indexName = indexes[j].reverse.name; }
1574                 
1575                 if (thisReads[i].forward.name == indexName){
1576                     thisReads[i].findex = indexes[j].forward;
1577                     thisReads[i].rindex = indexes[j].reverse;
1578                     reads.push_back(thisReads[i]);
1579                     found = true;
1580                     foundIndexes.insert(j);
1581                 }
1582             }
1583             
1584             if (!found) {
1585                 //look for forward pair
1586                 itUniques = uniques.find('i'+thisReads[i].forward.name);
1587                 if (itUniques != uniques.end()) {  //we have the pair for this read
1588                     thisReads[i].findex = itUniques->second.forward;
1589                     thisReads[i].rindex = itUniques->second.reverse;
1590                     reads.push_back(thisReads[i]);
1591                     uniques.erase(itUniques);
1592                 }else { //save this read for later
1593                     uniques['r'+thisReads[i].forward.name] = thisReads[i];
1594                 }
1595             }
1596         }
1597         
1598         if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1599             for (int j = 0; j < indexes.size(); j++) {
1600                 if (foundIndexes.count(j) == 0) { //we didnt find this one
1601                     //incase only one index
1602                     string indexName = indexes[j].forward.name;
1603                     if (indexName == "") { indexName = indexes[j].reverse.name; }
1604                     
1605                     //look for forward pair
1606                     itUniques = uniques.find('r'+indexName);
1607                     if (itUniques != uniques.end()) {  //we have the pair for this read
1608                         pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1609                         reads.push_back(temp);
1610                         uniques.erase(itUniques);
1611                     }else { //save this read for later
1612                         uniques['i'+indexName] = indexes[j];
1613                     }
1614                 }
1615             }
1616         }
1617        
1618         
1619         return reads;
1620     }
1621     catch(exception& e) {
1622         m->errorOut(e, "MakeContigsCommand", "mergeReads");
1623         exit(1);
1624     }
1625 }
1626 //**********************************************************************************************************************
1627 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1628     try {
1629         fastqRead read;
1630         
1631         ignore = false;
1632         
1633         //read sequence name
1634         string line = m->getline(in); m->gobble(in);
1635         vector<string> pieces = m->splitWhiteSpace(line);
1636         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1637         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1638         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1639         else { name = name.substr(1); }
1640         
1641         //read sequence
1642         string sequence = m->getline(in); m->gobble(in);
1643         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1644         
1645         //read sequence name
1646         line = m->getline(in); m->gobble(in);
1647         pieces = m->splitWhiteSpace(line);
1648         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1649         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1650         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1651         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1652         
1653         //read quality scores
1654         string quality = m->getline(in); m->gobble(in);
1655         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1656          
1657         //sanity check sequence length and number of quality scores match
1658         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1659         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1660         
1661         vector<int> qualScores = convertQual(quality);
1662         
1663         read.name = name;
1664         read.sequence = sequence;
1665         read.scores = qualScores;
1666         
1667         if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1668
1669         return read;
1670     }
1671     catch(exception& e) {
1672         m->errorOut(e, "MakeContigsCommand", "readFastq");
1673         exit(1);
1674     }
1675 }
1676 /**********************************************************************************************************************
1677 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1678     try {
1679         bool good = true;
1680         
1681         //do sequence lengths match
1682         if (forward.sequence.length() != reverse.sequence.length()) {
1683             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1684             good = false; 
1685         }
1686         
1687         //do number of qual scores match 
1688         if (forward.scores.size() != reverse.scores.size()) {
1689             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1690             good = false; 
1691         }
1692
1693         return good;
1694     }
1695     catch(exception& e) {
1696         m->errorOut(e, "MakeContigsCommand", "checkReads");
1697         exit(1);
1698     }
1699 }*/
1700 //***************************************************************************************************************
1701 //lines can be 2, 3, or 4 columns
1702 // forward.fastq reverse.fastq -> 2 column
1703 // groupName forward.fastq reverse.fastq -> 3 column
1704 // forward.fastq reverse.fastq forward.index.fastq  reverse.index.fastq  -> 4 column
1705 // forward.fastq reverse.fastq none  reverse.index.fastq  -> 4 column
1706 // forward.fastq reverse.fastq forward.index.fastq  none  -> 4 column
1707 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1708         try {
1709         vector< vector<string> > files;
1710         string forward, reverse, findex, rindex;
1711         
1712         ifstream in;
1713         m->openInputFile(filename, in);
1714         
1715         while(!in.eof()) {
1716             
1717             if (m->control_pressed) { return files; }
1718             
1719             string line = m->getline(in);  m->gobble(in);
1720             vector<string> pieces = m->splitWhiteSpace(line);
1721             
1722             string group = "";
1723             if (pieces.size() == 2) {
1724                 forward = pieces[0];
1725                 reverse = pieces[1];
1726                 group = "";
1727                 findex = "";
1728                 rindex = "";
1729             }else if (pieces.size() == 3) {
1730                 group = pieces[0];
1731                 forward = pieces[1];
1732                 reverse = pieces[2];
1733                 findex = "";
1734                 rindex = "";
1735                 createFileGroup = true;
1736             }else if (pieces.size() == 4) {
1737                 forward = pieces[0];
1738                 reverse = pieces[1];
1739                 findex = pieces[2];
1740                 rindex = pieces[3];
1741                 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1742                 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1743             }else {
1744                 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1745             }
1746             
1747             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1748             
1749             //check to make sure both are able to be opened
1750             ifstream in2;
1751             int openForward = m->openInputFile(forward, in2, "noerror");
1752             
1753             //if you can't open it, try default location
1754             if (openForward == 1) {
1755                 if (m->getDefaultPath() != "") { //default path is set
1756                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1757                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1758                     ifstream in3;
1759                     openForward = m->openInputFile(tryPath, in3, "noerror");
1760                     in3.close();
1761                     forward = tryPath;
1762                 }
1763             }
1764             
1765             //if you can't open it, try output location
1766             if (openForward == 1) {
1767                 if (m->getOutputDir() != "") { //default path is set
1768                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1769                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1770                     ifstream in4;
1771                     openForward = m->openInputFile(tryPath, in4, "noerror");
1772                     forward = tryPath;
1773                     in4.close();
1774                 }
1775             }
1776             
1777             if (openForward == 1) { //can't find it
1778                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1779             }else{  in2.close();  }
1780             
1781             ifstream in3;
1782             int openReverse = m->openInputFile(reverse, in3, "noerror");
1783             
1784             //if you can't open it, try default location
1785             if (openReverse == 1) {
1786                 if (m->getDefaultPath() != "") { //default path is set
1787                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1788                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1789                     ifstream in3;
1790                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1791                     in3.close();
1792                     reverse = tryPath;
1793                 }
1794             }
1795             
1796             //if you can't open it, try output location
1797             if (openReverse == 1) {
1798                 if (m->getOutputDir() != "") { //default path is set
1799                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1800                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1801                     ifstream in4;
1802                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1803                     reverse = tryPath;
1804                     in4.close();
1805                 }
1806             }
1807             
1808             if (openReverse == 1) { //can't find it
1809                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1810             }else{  in3.close();  }
1811             
1812             int openFindex = 0;
1813             if (findex != "") {
1814                 ifstream in4;
1815                 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1816                 
1817                 //if you can't open it, try default location
1818                 if (openFindex == 1) {
1819                     if (m->getDefaultPath() != "") { //default path is set
1820                         string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1821                         m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1822                         ifstream in5;
1823                         openFindex = m->openInputFile(tryPath, in5, "noerror");
1824                         in5.close();
1825                         findex = tryPath;
1826                     }
1827                 }
1828                 
1829                 //if you can't open it, try output location
1830                 if (openFindex == 1) {
1831                     if (m->getOutputDir() != "") { //default path is set
1832                         string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1833                         m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1834                         ifstream in6;
1835                         openFindex = m->openInputFile(tryPath, in6, "noerror");
1836                         findex = tryPath;
1837                         in6.close();
1838                     }
1839                 }
1840                 
1841                 if (openFindex == 1) { //can't find it
1842                     m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1843                 }
1844             }
1845             
1846             int openRindex = 0;
1847             if (rindex != "") {
1848                 ifstream in7;
1849                 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1850                 
1851                 //if you can't open it, try default location
1852                 if (openRindex == 1) {
1853                     if (m->getDefaultPath() != "") { //default path is set
1854                         string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1855                         m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1856                         ifstream in8;
1857                         openRindex = m->openInputFile(tryPath, in8, "noerror");
1858                         in8.close();
1859                         rindex = tryPath;
1860                     }
1861                 }
1862                 
1863                 //if you can't open it, try output location
1864                 if (openRindex == 1) {
1865                     if (m->getOutputDir() != "") { //default path is set
1866                         string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1867                         m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1868                         ifstream in9;
1869                         openRindex = m->openInputFile(tryPath, in9, "noerror");
1870                         rindex = tryPath;
1871                         in9.close();
1872                     }
1873                 }
1874                 
1875                 if (openRindex == 1) { //can't find it
1876                     m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1877                 }
1878             }
1879
1880             
1881             if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1882                 file2Group[files.size()] = group;
1883                 vector<string> pair;
1884                 pair.push_back(forward);
1885                 pair.push_back(reverse);
1886                 pair.push_back(findex);
1887                 pair.push_back(rindex);
1888                 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true;  }
1889                 files.push_back(pair);
1890             }
1891         }
1892         in.close();
1893         
1894         return files;
1895     }
1896     catch(exception& e) {
1897         m->errorOut(e, "MakeContigsCommand", "readFileNames");
1898         exit(1);
1899     }
1900 }
1901 //***************************************************************************************************************
1902 //illumina data requires paired forward and reverse data
1903 //BARCODE   atgcatgc   atgcatgc    groupName 
1904 //PRIMER   atgcatgc   atgcatgc    groupName  
1905 //PRIMER   atgcatgc   atgcatgc  
1906 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1907         try {
1908                 ifstream in;
1909                 m->openInputFile(oligosfile, in);
1910                 
1911                 ofstream test;
1912                 
1913                 string type, foligo, roligo, group;
1914         
1915                 int indexPrimer = 0;
1916                 int indexBarcode = 0;
1917         set<string> uniquePrimers;
1918         set<string> uniqueBarcodes;
1919                 
1920                 while(!in.eof()){
1921             
1922                         in >> type; 
1923     
1924                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1925             
1926                         if(type[0] == '#'){
1927                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1928                                 m->gobble(in);
1929                         }
1930                         else{
1931                                 m->gobble(in);
1932                                 //make type case insensitive
1933                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1934                                 
1935                                 in >> foligo;
1936                 
1937                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1938                                 
1939                                 for(int i=0;i<foligo.length();i++){
1940                                         foligo[i] = toupper(foligo[i]);
1941                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1942                                 }
1943                                 
1944                                 if(type == "PRIMER"){
1945                                         m->gobble(in);
1946                                         
1947                     in >> roligo;
1948                     
1949                     for(int i=0;i<roligo.length();i++){
1950                         roligo[i] = toupper(roligo[i]);
1951                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1952                     }
1953                     //roligo = reverseOligo(roligo);
1954                     
1955                     if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1956                     
1957                     group = "";
1958                     
1959                                         // get rest of line in case there is a primer name
1960                                         while (!in.eof())       {       
1961                                                 char c = in.get(); 
1962                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1963                                                 else if (c == 32 || c == 9){;} //space or tab
1964                                                 else {  group += c;  }
1965                                         } 
1966                     
1967                     oligosPair newPrimer(foligo, roligo);
1968                     
1969                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1970                     
1971                                         //check for repeat barcodes
1972                     string tempPair = foligo+roligo;
1973                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1974                     else { uniquePrimers.insert(tempPair); }
1975                                         
1976                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1977                     
1978                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1979                                         primerNameVector.push_back(group);
1980                                 }else if(type == "BARCODE"){
1981                                         m->gobble(in);
1982                                         
1983                     in >> roligo;
1984                     
1985                     for(int i=0;i<roligo.length();i++){
1986                         roligo[i] = toupper(roligo[i]);
1987                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1988                     }
1989                     //roligo = reverseOligo(roligo);
1990                     
1991                     oligosPair newPair(foligo, roligo);
1992                     
1993                     if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
1994                     
1995                     group = "";
1996                     while (!in.eof())   {       
1997                                                 char c = in.get(); 
1998                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1999                                                 else if (c == 32 || c == 9){;} //space or tab
2000                                                 else {  group += c;  }
2001                                         } 
2002                                         
2003                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2004                         
2005                     //check for repeat barcodes
2006                     string tempPair = foligo+roligo;
2007                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
2008                     else { uniqueBarcodes.insert(tempPair); }
2009                         
2010                     barcodes[indexBarcode]=newPair; indexBarcode++;
2011                                         barcodeNameVector.push_back(group);
2012                                 }else if(type == "LINKER"){
2013                                         linker.push_back(foligo);
2014                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2015                                 }else if(type == "SPACER"){
2016                                         spacer.push_back(foligo);
2017                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2018                                 }
2019                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2020                         }
2021                         m->gobble(in);
2022                 }       
2023                 in.close();
2024                 
2025                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
2026                 
2027                 //add in potential combos
2028                 if(barcodeNameVector.size() == 0){
2029             oligosPair temp("", "");
2030                         barcodes[0] = temp;
2031                         barcodeNameVector.push_back("");                        
2032                 }
2033                 
2034                 if(primerNameVector.size() == 0){
2035             oligosPair temp("", "");
2036                         primers[0] = temp;
2037                         primerNameVector.push_back("");                 
2038                 }
2039                 
2040                 fastaFileNames.resize(barcodeNameVector.size());
2041                 for(int i=0;i<fastaFileNames.size();i++){
2042                         fastaFileNames[i].assign(primerNameVector.size(), "");
2043                 }
2044                 
2045                 if(allFiles){
2046                         set<string> uniqueNames; //used to cleanup outputFileNames
2047                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2048                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2049                                         
2050                                         string primerName = primerNameVector[itPrimer->first];
2051                                         string barcodeName = barcodeNameVector[itBar->first];
2052                     
2053                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
2054                                         else {
2055                         string comboGroupName = "";
2056                         string fastaFileName = "";
2057                         string qualFileName = "";
2058                         string nameFileName = "";
2059                         string countFileName = "";
2060                         
2061                         if(primerName == ""){
2062                             comboGroupName = barcodeNameVector[itBar->first];
2063                         }
2064                         else{
2065                             if(barcodeName == ""){
2066                                 comboGroupName = primerNameVector[itPrimer->first];
2067                             }
2068                             else{
2069                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2070                             }
2071                         }
2072                         
2073                         
2074                         ofstream temp;
2075                         fastaFileName = rootname + comboGroupName + ".fasta";
2076                         if (uniqueNames.count(fastaFileName) == 0) {
2077                             outputNames.push_back(fastaFileName);
2078                             outputTypes["fasta"].push_back(fastaFileName);
2079                             uniqueNames.insert(fastaFileName);
2080                         }
2081                         
2082                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2083                         m->openOutputFile(fastaFileName, temp);         temp.close();
2084                     }
2085                                 }
2086                         }
2087                 }
2088                 
2089                 bool allBlank = true;
2090                 for (int i = 0; i < barcodeNameVector.size(); i++) {
2091                         if (barcodeNameVector[i] != "") {
2092                                 allBlank = false;
2093                                 break;
2094                         }
2095                 }
2096                 for (int i = 0; i < primerNameVector.size(); i++) {
2097                         if (primerNameVector[i] != "") {
2098                                 allBlank = false;
2099                                 break;
2100                         }
2101                 }
2102         
2103                 if (allBlank) {
2104                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
2105                         allFiles = false;
2106                         return false;
2107                 }
2108                 
2109                 return true;
2110                 
2111         }
2112         catch(exception& e) {
2113                 m->errorOut(e, "MakeContigsCommand", "getOligos");
2114                 exit(1);
2115         }
2116 }
2117 //********************************************************************/
2118 string MakeContigsCommand::reverseOligo(string oligo){
2119         try {
2120         string reverse = "";
2121         
2122         for(int i=oligo.length()-1;i>=0;i--){
2123             
2124             if(oligo[i] == 'A')         {       reverse += 'T'; }
2125             else if(oligo[i] == 'T'){   reverse += 'A'; }
2126             else if(oligo[i] == 'U'){   reverse += 'A'; }
2127             
2128             else if(oligo[i] == 'G'){   reverse += 'C'; }
2129             else if(oligo[i] == 'C'){   reverse += 'G'; }
2130             
2131             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2132             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2133             
2134             else if(oligo[i] == 'M'){   reverse += 'K'; }
2135             else if(oligo[i] == 'K'){   reverse += 'M'; }
2136             
2137             else if(oligo[i] == 'W'){   reverse += 'W'; }
2138             else if(oligo[i] == 'S'){   reverse += 'S'; }
2139             
2140             else if(oligo[i] == 'B'){   reverse += 'V'; }
2141             else if(oligo[i] == 'V'){   reverse += 'B'; }
2142             
2143             else if(oligo[i] == 'D'){   reverse += 'H'; }
2144             else if(oligo[i] == 'H'){   reverse += 'D'; }
2145             
2146             else                                                {       reverse += 'N'; }
2147         }
2148         
2149         
2150         return reverse;
2151     }
2152         catch(exception& e) {
2153                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2154                 exit(1);
2155         }
2156 }
2157 //**********************************************************************************************************************
2158 vector<int> MakeContigsCommand::convertQual(string qual) {
2159         try {
2160                 vector<int> qualScores;
2161         bool negativeScores = false;
2162                 
2163                 for (int i = 0; i < qual.length(); i++) { 
2164             
2165             int temp = 0;
2166             temp = int(qual[i]);
2167             if (format == "illumina") {
2168                 temp -= 64; //char '@'
2169             }else if (format == "illumina1.8+") {
2170                     temp -= int('!'); //char '!'
2171             }else if (format == "solexa") {
2172                 temp = int(convertTable[temp]); //convert to sanger
2173                 temp -= int('!'); //char '!'
2174             }else {
2175                 temp -= int('!'); //char '!'
2176             }
2177             
2178             if (temp < -5) { negativeScores = true; }
2179                         qualScores.push_back(temp);
2180                 }
2181                 
2182         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
2183         
2184                 return qualScores;
2185         }
2186         catch(exception& e) {
2187                 m->errorOut(e, "MakeContigsCommand", "convertQual");
2188                 exit(1);
2189         }
2190 }
2191
2192 //**********************************************************************************************************************
2193
2194
2195
2196