]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
changes while testing
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22         CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23         CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57         helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60         helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
63         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
64                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65         helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process.  \n";
66         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
67         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
74         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
77         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
79         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
80
81         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82         helpString += "The make.contigs command should be in the following format: \n";
83                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
85                 return helpString;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
94     try {
95         string pattern = "";
96         
97         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
98         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
99         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
100         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
101         
102         return pattern;
103     }
104     catch(exception& e) {
105         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
106         exit(1);
107     }
108 }
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){       
111         try {
112                 abort = true; calledHelp = true; 
113                 setParameters();
114                 vector<string> tempOutNames;
115                 outputTypes["fasta"] = tempOutNames;
116         outputTypes["group"] = tempOutNames;
117         outputTypes["report"] = tempOutNames;
118     }
119         catch(exception& e) {
120                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option)  {
126         try {
127                 abort = false; calledHelp = false;
128         createFileGroup = false; createOligosGroup = false;
129         
130                 //allow user to run help
131                 if(option == "help") { help(); abort = true; calledHelp = true; }
132                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
133                 
134                 else {
135                         vector<string> myArray = setParameters();
136                         
137                         OptionParser parser(option);
138                         map<string, string> parameters = parser.getParameters(); 
139                         
140                         ValidParameters validParameter("pairwise.seqs");
141                         map<string, string>::iterator it;
142                         
143                         //check to make sure all parameters are valid for command
144                         for (it = parameters.begin(); it != parameters.end(); it++) { 
145                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
146                         }
147                         
148                         //initialize outputTypes
149                         vector<string> tempOutNames;
150                         outputTypes["fasta"] = tempOutNames;
151             outputTypes["report"] = tempOutNames;
152             outputTypes["group"] = tempOutNames;
153                         
154             
155                         //if the user changes the input directory command factory will send this info to us in the output parameter 
156                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
157                         if (inputDir == "not found"){   inputDir = "";          }
158                         else { 
159                                 string path;
160                 it = parameters.find("ffastq");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
166                                 }
167                 
168                 it = parameters.find("rfastq");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
174                                 }
175                 
176                 it = parameters.find("ffasta");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
182                                 }
183                 
184                 it = parameters.find("rfasta");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
190                                 }
191                 
192                 it = parameters.find("fqfile");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
198                                 }
199                 
200                 it = parameters.find("rqfile");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
206                                 }
207                 
208                 it = parameters.find("file");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
214                                 }
215                 
216                 it = parameters.find("oligos");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
222                                 }
223                 
224                 it = parameters.find("findex");
225                                 //user has given a template file
226                                 if(it != parameters.end()){
227                                         path = m->hasPath(it->second);
228                                         //if the user has not given a path then, add inputdir. else leave path alone.
229                                         if (path == "") {       parameters["findex"] = inputDir + it->second;           }
230                                 }
231                 
232                 it = parameters.find("rindex");
233                                 //user has given a template file
234                                 if(it != parameters.end()){
235                                         path = m->hasPath(it->second);
236                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                         if (path == "") {       parameters["rindex"] = inputDir + it->second;           }
238                                 }
239             }
240             
241             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242                         if (ffastqfile == "not open") {  abort = true; }        
243                         else if (ffastqfile == "not found") { ffastqfile = ""; }
244                         
245                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246                         if (rfastqfile == "not open") {  abort = true; }        
247                         else if (rfastqfile == "not found") { rfastqfile = "";  }
248             
249             ffastafile = validParameter.validFile(parameters, "ffasta", true);
250                         if (ffastafile == "not open") {  abort = true; }        
251                         else if (ffastafile == "not found") { ffastafile = ""; }
252                         
253                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
254                         if (rfastafile == "not open") {  abort = true; }        
255                         else if (rfastafile == "not found") { rfastafile = "";  }
256             
257             fqualfile = validParameter.validFile(parameters, "fqfile", true);
258                         if (fqualfile == "not open") {  abort = true; } 
259                         else if (fqualfile == "not found") { fqualfile = ""; }
260                         
261                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
262                         if (rqualfile == "not open") {  abort = true; } 
263                         else if (rqualfile == "not found") { rqualfile = "";  }
264             
265             file = validParameter.validFile(parameters, "file", true);
266                         if (file == "not open") {  abort = true; }      
267                         else if (file == "not found") { file = "";  }
268             
269             //provide at least
270             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
280             }
281             
282             oligosfile = validParameter.validFile(parameters, "oligos", true);
283                         if (oligosfile == "not found")      {   oligosfile = "";        }
284                         else if(oligosfile == "not open")   {   abort = true;       } 
285                         else {   m->setOligosFile(oligosfile);          }
286             
287             findexfile = validParameter.validFile(parameters, "findex", true);
288                         if (findexfile == "not found")      {   findexfile = "";        }
289                         else if(findexfile == "not open")   {   abort = true;       }
290             
291             rindexfile = validParameter.validFile(parameters, "rindex", true);
292                         if (rindexfile == "not found")      {   rindexfile = "";        }
293                         else if(rindexfile == "not open")   {   abort = true;       }
294         
295             if ((rindexfile != "") || (findexfile != "")) {
296                                 if (oligosfile == ""){
297                                         oligosfile = m->getOligosFile();
298                                         if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n");  }
299                                         else {
300                         m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
301                                         }
302                                 }
303                 
304                 //can only use an index file with the fastq parameters not fasta and qual
305                 if ((ffastafile != "") || (rfastafile != "")) {
306                     m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
307                 }
308                         }
309             
310             //if the user changes the output directory command factory will send this info to us in the output parameter 
311                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
312                  outputDir = ""; 
313             }
314                         
315
316                         //check for optional parameter and set defaults
317                         // ...at some point should added some additional type checking...
318                         string temp;
319                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
320                         m->mothurConvert(temp, match);  
321                         
322                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
323                         m->mothurConvert(temp, misMatch);  
324             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
325                         
326                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
327                         m->mothurConvert(temp, gapOpen);  
328             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
329                         
330                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
331                         m->mothurConvert(temp, gapExtend); 
332             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
333                         
334             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
335                         m->mothurConvert(temp, insert); 
336             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
337
338             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
339                         m->mothurConvert(temp, deltaq);
340             
341                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
342                         m->setProcessors(temp);
343                         m->mothurConvert(temp, processors);
344             
345             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
346                         m->mothurConvert(temp, bdiffs);
347                         
348                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
349                         m->mothurConvert(temp, pdiffs);
350             
351   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
352 //                      m->mothurConvert(temp, ldiffs);
353             ldiffs = 0;
354             
355  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
356  //           m->mothurConvert(temp, sdiffs);
357             sdiffs = 0;
358                         
359                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
360                         m->mothurConvert(temp, tdiffs);
361                         
362                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
363
364             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
365                         allFiles = m->isTrue(temp);
366             
367             
368             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
369                         trimOverlap = m->isTrue(temp);
370                         
371                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
372                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
373             
374             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
375             
376             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
377                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
378                                 abort=true;
379                         }
380             
381             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382             for (int i = -64; i < 65; i++) { 
383                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384                 convertTable.push_back(temp);
385             }
386         }
387                 
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
391                 exit(1);
392         }
393 }
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
396         try {
397                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
398         
399         //read ffastq and rfastq files creating fasta and qual files.
400         //this function will create a forward and reverse, fasta and qual files for each processor.
401         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
402         unsigned long int numReads = 0;
403         int start = time(NULL);
404         longestBase = 1000;
405         m->mothurOut("Reading fastq data...\n"); 
406         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407         m->mothurOut("Done.\n");
408        
409         if (m->control_pressed) { return 0; }
410         
411         map<string, string> cvars;
412         string compOutputDir = outputDir;
413         if (outputDir == "") { compOutputDir = m->hasPath(file); }
414         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
415         cvars["[tag]"] = "";
416         string compositeGroupFile = getOutputFileName("group",cvars);
417         cvars["[tag]"] = "trim";
418         string compositeFastaFile = getOutputFileName("fasta",cvars);
419         cvars["[tag]"] = "scrap";
420         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
421         cvars["[tag]"] = "";
422         string compositeMisMatchFile = getOutputFileName("report",cvars);
423         
424         if (filesToProcess.size() > 1) { //clear files for append below
425             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
432         }
433         
434         map<string, int> totalGroupCounts;
435         
436         for (int l = 0; l < filesToProcess.size(); l++) {
437             
438             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
439             
440             groupCounts.clear();
441             groupMap.clear();
442             vector<vector<string> > fastaFileNames;
443             createOligosGroup = false;
444             string outputGroupFileName;
445             map<string, string> variables; 
446             string thisOutputDir = outputDir;
447             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449             variables["[tag]"] = "";
450             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
451             if (createOligosGroup || createFileGroup) {
452                 outputGroupFileName = getOutputFileName("group",variables);
453             }
454             
455             //give group in file file precedence
456             if (createFileGroup) {  createOligosGroup = false; }
457             
458             variables["[tag]"] = "trim";
459             string outFastaFile = getOutputFileName("fasta",variables);
460             variables["[tag]"] = "scrap";
461             string outScrapFastaFile = getOutputFileName("fasta",variables);
462             variables["[tag]"] = "";
463             string outMisMatchFile = getOutputFileName("report",variables);
464                         
465             m->mothurOut("Making contigs...\n"); 
466             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
467             
468             //remove temp fasta and qual files
469             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
470             
471             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
472             
473             if(allFiles){
474                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475                 map<string, string>::iterator it;
476                 set<string> namesToRemove;
477                 for(int i=0;i<fastaFileNames.size();i++){
478                     for(int j=0;j<fastaFileNames[0].size();j++){
479                         if (fastaFileNames[i][j] != "") {
480                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481                                 if(m->isBlank(fastaFileNames[i][j])){
482                                     m->mothurRemove(fastaFileNames[i][j]);
483                                     namesToRemove.insert(fastaFileNames[i][j]);
484                                 }else{  
485                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
486                                     if (it == uniqueFastaNames.end()) { 
487                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
488                                     }   
489                                 }
490                             }
491                         }
492                     }
493                 }
494                 
495                 //remove names for outputFileNames, just cleans up the output
496                 vector<string> outputNames2;
497                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498                 outputNames = outputNames2;
499                 
500                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
501                     ifstream in;
502                     m->openInputFile(it->first, in);
503                     
504                     ofstream out;
505                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507                     m->openOutputFile(thisGroupName, out);
508                     
509                     while (!in.eof()){
510                         if (m->control_pressed) { break; }
511                         
512                         Sequence currSeq(in); m->gobble(in);
513                         out << currSeq.getName() << '\t' << it->second << endl;
514                     }
515                     out.close();
516                     in.close();
517                 }
518             }
519             
520             if (createFileGroup || createOligosGroup) {
521                 ofstream outGroup;
522                 m->openOutputFile(outputGroupFileName, outGroup);
523                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
525                 }
526                 outGroup.close();
527             }
528             
529             if (filesToProcess.size() > 1) { //merge into large combo files
530                 if (createFileGroup || createOligosGroup) {
531                     if (l == 0) {
532                         ofstream outCGroup;
533                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
535                     }
536                     m->appendFiles(outputGroupFileName, compositeGroupFile);
537                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
538                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
539                     
540                     for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541                         map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542                         if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543                         else { itTemp->second += itGroups->second; } //existing group, update total
544                     }
545                 }
546                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
547                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
548                 m->appendFiles(outFastaFile, compositeFastaFile);
549                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
550                 if (!allFiles) {
551                     m->mothurRemove(outMisMatchFile);
552                     m->mothurRemove(outFastaFile);
553                     m->mothurRemove(outScrapFastaFile);
554                 }else {
555                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
558                 }
559             }else {
560                 totalGroupCounts = groupCounts;
561                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564                 if (createFileGroup || createOligosGroup) {
565                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
566                 }
567             }
568             m->mothurOut("Done.\n");
569         }
570         
571         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
572         
573         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
574         
575                 //output group counts
576                 m->mothurOutEndLine();
577                 int total = 0;
578                 if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
579                 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
581                 }
582                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
583                 
584                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
585         
586         string currentFasta = "";
587                 itTypes = outputTypes.find("fasta");
588                 if (itTypes != outputTypes.end()) {
589                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
590                 }
591         
592         string currentGroup = "";
593                 itTypes = outputTypes.find("group");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
596                 }
597                 
598         //output files created by command
599                 m->mothurOutEndLine();
600                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
602                 m->mothurOutEndLine();
603
604         return 0;
605     }
606         catch(exception& e) {
607                 m->errorOut(e, "MakeContigsCommand", "execute");
608                 exit(1);
609         }
610 }
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
613         try {
614         vector< vector< vector<string> > > filesToProcess;
615         
616         if (ffastqfile != "") { //reading one file
617             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618             //adjust for really large processors or really small files
619             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620             if (numReads < processors) { 
621                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622                 files.resize(numReads);
623                 processors = numReads; 
624             }
625             filesToProcess.push_back(files);
626         }else if (file != "") { //reading multiple files
627             //return only valid pairs
628             vector< vector<string> > filePairsToProcess = readFileNames(file);
629             
630             if (m->control_pressed) { return filesToProcess; }
631             
632             if (filePairsToProcess.size() != 0) {
633                 for (int i = 0; i < filePairsToProcess.size(); i++) {
634                     
635                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
636                     
637                     unsigned long int thisFilesReads;
638                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
639                     
640                     //adjust for really large processors or really small files
641                     if (thisFilesReads < processors) { 
642                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
643                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644                         //remove from file2Group if necassary
645                         map<int, string> cFile2Group;
646                         for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647                             if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648                             else if ((it->first) == i) { } //do nothing, we removed files for i
649                             else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
650                         }
651                         file2Group = cFile2Group;
652                     }else {
653                         filesToProcess.push_back(files);
654                         numReads += thisFilesReads;
655                     }
656                 }
657                 //all files are bad
658                 if (numReads == 0) {  m->control_pressed = true; }
659             }
660         }else if (ffastafile != "") {
661             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662             //adjust for really large processors or really small files
663             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664             if (numReads < processors) { 
665                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666                 files.resize(numReads);
667                 processors = numReads; 
668             }
669             filesToProcess.push_back(files);
670         }else { m->control_pressed = true; } //should not get here
671         
672         return filesToProcess;
673     }
674         catch(exception& e) {
675                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
676                 exit(1);
677         }
678 }
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
681         try {
682                 int num = 0;
683                 vector<int> processIDS;
684         string group = "";
685         map<int, string>::iterator it = file2Group.find(index);
686         if (it != file2Group.end()) { group = it->second; }
687         
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
689                 int process = 0;
690                 
691                 //loop through and create all the processes you want
692                 while (process != processors-1) {
693                         int pid = fork();
694                         
695                         if (pid > 0) {
696                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
697                                 process++;
698                         }else if (pid == 0){
699                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
700                 
701                                 if(allFiles){
702                                         ofstream temp;
703                     
704                                         for(int i=0;i<tempFASTAFileNames.size();i++){
705                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706                                                         if (tempFASTAFileNames[i][j] != "") {
707                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
708                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
709                                                         }
710                                                 }
711                                         }
712                                 }
713                 
714                                 num = driver(files[process], 
715                              outputFasta + toString(getpid()) + ".temp", 
716                              outputScrapFasta + toString(getpid()) + ".temp", 
717                              outputMisMatches + toString(getpid()) + ".temp",
718                              tempFASTAFileNames, process, group);
719                                 
720                                 //pass groupCounts to parent
721                 ofstream out;
722                 string tempFile = toString(getpid()) + ".num.temp";
723                 m->openOutputFile(tempFile, out);
724                 out << num << endl;
725                                 if (createFileGroup || createOligosGroup) {
726                                         out << groupCounts.size() << endl;
727                                         
728                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729                                                 out << it->first << '\t' << it->second << endl;
730                                         }
731                     
732                     out << groupMap.size() << endl;
733                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734                                                 out << it->first << '\t' << it->second << endl;
735                                         }
736                                 }
737                 out.close();
738                                 
739                                 exit(0);
740                         }else { 
741                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
742                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
743                                 exit(0);
744                         }
745                 }
746                 
747         ofstream temp;
748                 m->openOutputFile(outputFasta, temp);           temp.close();
749         m->openOutputFile(outputScrapFasta, temp);              temp.close();
750                 
751                 //do my part
752                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
753                 
754                 //force parent to wait until all the processes are done
755                 for (int i=0;i<processIDS.size();i++) { 
756                         int temp = processIDS[i];
757                         wait(&temp);
758                 }
759         
760                 for (int i = 0; i < processIDS.size(); i++) {
761             ifstream in;
762             string tempFile = toString(processIDS[i]) + ".num.temp";
763             m->openInputFile(tempFile, in);
764             int tempNum;
765             in >> tempNum; num += tempNum; m->gobble(in);
766             
767                         if (createFileGroup || createOligosGroup) {
768                                 string group;
769                                 in >> tempNum; m->gobble(in);
770                                 
771                                 if (tempNum != 0) {
772                                         for (int j = 0; j < tempNum; j++) { 
773                         int groupNum;
774                                                 in >> group >> groupNum; m->gobble(in);
775                         
776                                                 map<string, int>::iterator it = groupCounts.find(group);
777                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
778                                                 else { groupCounts[it->first] += groupNum; }
779                                         }
780                                 }
781                 in >> tempNum; m->gobble(in);
782                 if (tempNum != 0) {
783                                         for (int j = 0; j < tempNum; j++) { 
784                         string group, seqName;
785                                                 in >> seqName >> group; m->gobble(in);
786                         
787                                                 map<string, string>::iterator it = groupMap.find(seqName);
788                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
789                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
790                                         }
791                                 }
792                         }
793             in.close(); m->mothurRemove(tempFile);
794         }
795     #else
796         
797         //////////////////////////////////////////////////////////////////////////////////////////////////////
798                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
799                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
800                 //////////////////////////////////////////////////////////////////////////////////////////////////////
801                 
802                 vector<contigsData*> pDataArray; 
803                 DWORD   dwThreadIdArray[processors-1];
804                 HANDLE  hThreadArray[processors-1]; 
805                 
806                 //Create processor worker threads.
807                 for( int h=0; h<processors-1; h++ ){
808                         string extension = "";
809                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
811                         
812             if(allFiles){
813                 ofstream temp;
814                 
815                 for(int i=0;i<tempFASTAFileNames.size();i++){
816                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
817                         if (tempFASTAFileNames[i][j] != "") {
818                             tempFASTAFileNames[i][j] += extension;
819                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
820                         }
821                     }
822                 }
823             }
824                                  
825                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826                         pDataArray.push_back(tempcontig);
827             
828                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
829                 }
830         
831         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
832
833         if(allFiles){
834             ofstream temp;
835             string extension = toString(processors-1) + ".temp";
836             
837             for(int i=0;i<tempFASTAFileNames.size();i++){
838                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839                     if (tempFASTAFileNames[i][j] != "") {
840                         tempFASTAFileNames[i][j] += extension;
841                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
842                     }
843                 }
844             }
845         }
846
847                 //parent do my part
848                 ofstream temp;
849                 m->openOutputFile(outputFasta, temp);           temp.close();
850         m->openOutputFile(outputScrapFasta, temp);              temp.close();
851         
852         //do my part
853         processIDS.push_back(processors-1);
854                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
855         
856                 //Wait until all threads have terminated.
857                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
858                 
859                 //Close all thread handles and free memory allocations.
860                 for(int i=0; i < pDataArray.size(); i++){
861                         num += pDataArray[i]->count;
862             if (!pDataArray[i]->done) {
863                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
864             }
865             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866                 map<string, int>::iterator it2 = groupCounts.find(it->first);
867                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868                 else { groupCounts[it->first] += it->second; }
869             }
870             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871                 map<string, string>::iterator it2 = groupMap.find(it->first);
872                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
873                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
874             }
875             CloseHandle(hThreadArray[i]);
876                         delete pDataArray[i];
877         }
878                                 
879     #endif      
880         
881         for (int i = 0; i < processIDS.size(); i++) {
882                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
884                         
885                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
887             
888             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
890             
891             if(allFiles){
892                                 for(int j=0;j<fastaFileNames.size();j++){
893                                         for(int k=0;k<fastaFileNames[j].size();k++){
894                                                 if (fastaFileNames[j][k] != "") {
895                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
897                                                 }
898                                         }
899                                 }
900                         }
901                 }
902                 
903                 return num;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
907                 exit(1);
908         }
909 }
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
912     try {
913         
914         Alignment* alignment;
915         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
916                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
917         
918         int num = 0;
919         string thisffastafile = files[0];
920         string thisfqualfile = files[1];
921         string thisrfastafile = files[2];
922         string thisrqualfile = files[3];
923         string thisfindexfile = files[4];
924         string thisrindexfile = files[5];
925         
926         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
927         
928         ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929         ofstream outFasta, outMisMatch, outScrapFasta;
930         m->openInputFile(thisffastafile, inFFasta);
931         m->openInputFile(thisrfastafile, inRFasta);
932         if (thisfqualfile != "") {
933             m->openInputFile(thisfqualfile, inFQual);
934             m->openInputFile(thisrqualfile, inRQual);
935         }
936         
937         if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex);  }
938         if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex);  }
939         
940         m->openOutputFile(outputFasta, outFasta);
941         m->openOutputFile(outputScrapFasta, outScrapFasta);
942         m->openOutputFile(outputMisMatches, outMisMatch);
943         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
944         
945         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
946         
947         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
948             
949             if (m->control_pressed) { break; }
950             
951             int success = 1;
952             string trashCode = "";
953             int currentSeqsDiffs = 0;
954
955             //read seqs and quality info
956             Sequence fSeq(inFFasta); m->gobble(inFFasta);
957             Sequence rSeq(inRFasta); m->gobble(inRFasta);
958             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959             if (thisfqualfile != "") {
960                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
962             }
963             Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
964             if (thisfindexfile != "") {
965                 Sequence temp(inFIndex); m->gobble(inFIndex);
966                 findexBarcode.setAligned(temp.getAligned());
967             }
968             
969             if (thisrindexfile != "") {
970                 Sequence temp(inRIndex); m->gobble(inRIndex);
971                 rindexBarcode.setAligned(temp.getAligned());
972             }
973             
974             int barcodeIndex = 0;
975             int primerIndex = 0;
976                         
977             if(barcodes.size() != 0){
978                 if (thisfqualfile != "") {
979                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
980                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
981                     }else {
982                         success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
983                     }
984                 }else {
985                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
986                 }
987                 if(success > bdiffs)            {       trashCode += 'b';       }
988                 else{ currentSeqsDiffs += success;  }
989             }
990             
991             if(primers.size() != 0){
992                 if (thisfqualfile != "") {
993                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
994                 }else {
995                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
996                 }
997                 if(success > pdiffs)            {       trashCode += 'f';       }
998                 else{ currentSeqsDiffs += success;  }
999             }
1000             
1001             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
1002             
1003             //flip the reverse reads
1004             rSeq.reverseComplement();
1005             if (thisfqualfile != "") { rQual->flipQScores(); }
1006
1007             //pairwise align
1008             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011             fSeq.setAligned(alignment->getSeqAAln());
1012             rSeq.setAligned(alignment->getSeqBAln());
1013             int length = fSeq.getAligned().length();
1014             
1015             //traverse alignments merging into one contiguous seq
1016             string contig = "";
1017             int numMismatches = 0;
1018             string seq1 = fSeq.getAligned();
1019             string seq2 = rSeq.getAligned();
1020             vector<int> scores1, scores2; 
1021             if (thisfqualfile != "") {
1022                 scores1 = fQual->getQualityScores();
1023                 scores2 = rQual->getQualityScores();
1024                 delete fQual; delete rQual;
1025             }
1026             
1027             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028             int overlapStart = fSeq.getStartPos();
1029             int seq2Start = rSeq.getStartPos();
1030             
1031             //bigger of the 2 starting positions is the location of the overlapping start
1032             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033                 overlapStart = seq2Start; 
1034                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
1035             }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
1037             }
1038             
1039             int seq1End = fSeq.getEndPos();
1040             int seq2End = rSeq.getEndPos();
1041             int overlapEnd = seq1End;
1042             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
1043             
1044             int oStart = contig.length();
1045             for (int i = overlapStart; i < overlapEnd; i++) {
1046                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1047                     contig += seq1[i];
1048                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1049                     if (thisfqualfile != "") {
1050                         if (scores2[BBaseMap[i]] <= insert) { } //
1051                         else { contig += seq2[i];  }
1052                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
1053                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1054                     if (thisfqualfile != "") {
1055                         if (scores1[ABaseMap[i]] <= insert) { } //
1056                         else { contig += seq1[i];  }
1057                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
1058                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1059                     if (thisfqualfile != "") {
1060                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1061                             char c = seq1[i];
1062                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1063                             contig += c;
1064                         }else { //if no, base becomes n
1065                             contig += 'N';
1066                         }
1067                         numMismatches++;
1068                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1069                 }else { //should never get here
1070                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1071                 }
1072             }
1073             int oend = contig.length();
1074             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1075                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
1076             }else { //seq2 ends before seq1 so take from overlap to length from seq1
1077                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
1078             }
1079             
1080             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
1081             
1082             if(trashCode.length() == 0){
1083                 bool ignore = false;
1084                 
1085                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1086                 
1087                 if (createOligosGroup) {
1088                     if(barcodes.size() != 0){
1089                         string thisGroup = barcodeNameVector[barcodeIndex];
1090                         if (primers.size() != 0) { 
1091                             if (primerNameVector[primerIndex] != "") { 
1092                                 if(thisGroup != "") {
1093                                     thisGroup += "." + primerNameVector[primerIndex]; 
1094                                 }else {
1095                                     thisGroup = primerNameVector[primerIndex]; 
1096                                 }
1097                             } 
1098                         }
1099                         
1100                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1101                         
1102                         int pos = thisGroup.find("ignore");
1103                         if (pos == string::npos) {
1104                             groupMap[fSeq.getName()] = thisGroup; 
1105                         
1106                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1107                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1108                             else { groupCounts[it->first] ++; }
1109                         }else { ignore = true; }
1110                         
1111                     }
1112                 }else if (createFileGroup) {
1113                     int pos = group.find("ignore");
1114                     if (pos == string::npos) {
1115                         groupMap[fSeq.getName()] = group;
1116                         
1117                         map<string, int>::iterator it = groupCounts.find(group);
1118                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1119                         else { groupCounts[it->first] ++; }
1120                     }else { ignore = true; }
1121                 }
1122                 if (m->debug) { m->mothurOut("\n"); }
1123                 
1124                 if(allFiles && !ignore){
1125                     ofstream output;
1126                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1127                     output << ">" << fSeq.getName() << endl << contig << endl;
1128                     output.close();
1129                 }
1130                 
1131                 //output
1132                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1133                 int numNs = 0;
1134                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1135                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1136             }else {
1137                 //output
1138                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1139             }
1140             num++;
1141             
1142                         //report progress
1143                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1144                 }
1145         
1146                 //report progress
1147                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1148         
1149         inFFasta.close();
1150         inRFasta.close();
1151         outFasta.close();
1152         outScrapFasta.close();
1153         outMisMatch.close();
1154         if (thisfqualfile != "") {
1155             inFQual.close();
1156             inRQual.close();
1157         }
1158         delete alignment;
1159         
1160         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1161     
1162         return num;
1163     }
1164         catch(exception& e) {
1165                 m->errorOut(e, "MakeContigsCommand", "driver");
1166                 exit(1);
1167         }
1168 }
1169 //**********************************************************************************************************************
1170 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1171     try {
1172         vector< vector<string> > files;
1173         //maps processors number to file pointer
1174         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1175         map<int, vector<ofstream*> >::iterator it;
1176         
1177         //create files to write to
1178         for (int i = 0; i < processors; i++) {
1179             vector<ofstream*> temp;
1180             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1181             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1182             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1183             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1184             ofstream* outFI = new ofstream;     temp.push_back(outFI);  
1185             ofstream* outRI = new ofstream;     temp.push_back(outRI);  
1186             tempfiles[i] = temp;
1187             
1188             vector<string> names;
1189             string thisOutputDir = outputDir;
1190             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1191             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1192             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1193             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1194             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1195             string findexfilename = ""; string rindexfilename = "";
1196             noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1197             if (findex != "") {  findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI);  noneOk = true; }
1198             if (rindex != "") {  rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp";  m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1199             names.push_back(ffastafilename); names.push_back(fqualfilename);
1200             names.push_back(rfastafilename); names.push_back(rqualfilename);
1201             names.push_back(findexfilename); names.push_back(rindexfilename);
1202             files.push_back(names);
1203             
1204             m->openOutputFile(ffastafilename, *outFF);
1205             m->openOutputFile(rfastafilename, *outRF);
1206             m->openOutputFile(fqualfilename, *outFQ);
1207             m->openOutputFile(rqualfilename, *outRQ);
1208         }
1209         
1210         if (m->control_pressed) {
1211             //close files, delete ofstreams
1212             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1213             //remove files
1214             for (int i = 0; i < files.size(); i++) {  
1215                 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") {  m->mothurRemove(files[i][j]); }  }
1216             }
1217         }
1218         
1219         ifstream inForward;
1220         m->openInputFile(ffastq, inForward);
1221         
1222         ifstream inReverse;
1223         m->openInputFile(rfastq, inReverse);
1224         
1225         ifstream infIndex, inrIndex;
1226         bool findexIsGood = false;
1227         bool rindexIsGood = false;
1228         if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1229         if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1230         
1231         count = 0;
1232         map<string, fastqRead> uniques;
1233         map<string, fastqRead> iUniques;
1234         map<string, pairFastqRead> pairUniques;
1235         map<string, fastqRead>::iterator itUniques;
1236         while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1237             
1238             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1239             
1240             //get a read from forward and reverse fastq files
1241             bool ignoref, ignorer, ignorefi, ignoreri;
1242             fastqRead thisFread, thisRread, thisFIread, thisRIread;
1243             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1244             else { ignoref = true; }
1245             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1246             else { ignorer = true; }
1247             if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1248             else { ignorefi = true; }
1249             if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri);  if (inrIndex.eof()) { rindexIsGood = false; } }
1250             else { ignoreri = true; }
1251             
1252             bool allowOne = false;
1253             if ((findex == "") || (rindex == "")) { allowOne = true; }
1254             vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1255             vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1256             
1257             //add in index info if provided
1258             vector<pairFastqRead> reads = frReads;
1259             if ((findex != "") || (rindex != "")) {  reads = mergeReads(frReads, friReads, pairUniques); }
1260             
1261             for (int i = 0; i < reads.size(); i++) {
1262                 fastqRead fread = reads[i].forward;
1263                 fastqRead rread = reads[i].reverse;
1264                 fastqRead firead = reads[i].findex;
1265                 fastqRead riread = reads[i].rindex;
1266                 
1267                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1268                
1269                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1270                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1271                     
1272                     //if the reads are okay write to output files
1273                     int process = count % processors;
1274                     
1275                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1276                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1277                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1278                     *(tempfiles[process][1]) << endl;
1279                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1280                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1281                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1282                     *(tempfiles[process][3]) << endl;
1283                     if (findex != "") {  *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1284                     if (rindex != "") {  *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1285                 
1286                     count++;
1287                     
1288                     //report progress
1289                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1290                 //}
1291             }
1292                 }
1293                 //report progress
1294                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1295         
1296         if (uniques.size() != 0) {
1297             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1298                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1299             }
1300             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1301                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1302             }
1303             m->mothurOutEndLine();
1304         }
1305         
1306         //close files, delete ofstreams
1307         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1308         inForward.close();
1309         inReverse.close();
1310         if (findex != "") { infIndex.close(); }
1311         if (rindex != "") { inrIndex.close(); }
1312        
1313         return files;
1314     }
1315     catch(exception& e) {
1316         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1317         exit(1);
1318     }
1319 }
1320 //**********************************************************************************************************************
1321 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1322     try {
1323         vector< vector<string> > files;
1324         //maps processors number to file pointer
1325         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1326         map<int, vector<ofstream*> >::iterator it;
1327         
1328         //create files to write to
1329         for (int i = 0; i < processors; i++) {
1330             vector<ofstream*> temp;
1331             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1332             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1333             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1334             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1335             tempfiles[i] = temp;
1336             
1337             vector<string> names;
1338             string thisOutputDir = outputDir;
1339             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1340             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1341             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1342             string fqualfilename = "";
1343             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1344             string rqualfilename = "";
1345             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1346             string findexfilename = ""; string rindexfilename = "";
1347             names.push_back(ffastafilename); names.push_back(fqualfilename);
1348             names.push_back(rfastafilename); names.push_back(rqualfilename);
1349             names.push_back(findexfilename); names.push_back(rindexfilename);
1350             files.push_back(names);
1351             
1352             m->openOutputFile(ffastafilename, *outFF);
1353             m->openOutputFile(rfastafilename, *outRF);
1354         }
1355         
1356         if (m->control_pressed) {
1357             //close files, delete ofstreams
1358             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1359             //remove files
1360             for (int i = 0; i < files.size(); i++) {  
1361                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1362             }
1363         }
1364         
1365         ifstream inForwardFasta;
1366         m->openInputFile(ffasta, inForwardFasta);
1367         
1368         ifstream inReverseFasta;
1369         m->openInputFile(rfasta, inReverseFasta);
1370         
1371         ifstream inForwardQual, inReverseQual;
1372         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1373         
1374         count = 0;
1375         map<string, fastqRead> uniques;
1376         map<string, fastqRead>::iterator itUniques;
1377         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1378             
1379             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1380             
1381             //get a reads from forward and reverse fasta files
1382             bool ignoref, ignorer;
1383             fastqRead thisFread, thisRread;
1384             if (!inForwardFasta.eof()) {  
1385                 ignoref = false; 
1386                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1387                 thisFread.name = temp.getName();
1388                 thisFread.sequence = temp.getUnaligned();
1389             }else { ignoref = true; }
1390             if (!inReverseFasta.eof()) {  
1391                 ignorer = false; 
1392                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1393                 thisRread.name = temp.getName();
1394                 thisRread.sequence = temp.getUnaligned();  
1395             }else { ignorer = true; }
1396             
1397             //get qual reads if given
1398             if (fqualfile != "") {
1399                 if (!inForwardQual.eof() && !ignoref) {  
1400                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1401                     //if forward files dont match ignore read
1402                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1403                     else { thisFread.scores = temp.getQualityScores(); }
1404                 }else { ignoref = true; }
1405                 if (!inReverseQual.eof() && !ignorer) {  
1406                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1407                     //if reverse files dont match ignore read
1408                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1409                     else { thisRread.scores = temp.getQualityScores(); }
1410                 }else { ignorer = true; }
1411             }
1412             
1413             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1414             
1415             for (int i = 0; i < reads.size(); i++) {
1416                 fastqRead fread = reads[i].forward;
1417                 fastqRead rread = reads[i].reverse;
1418                 
1419                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1420                 
1421                // if (checkReads(fread, rread, ffasta, rfasta)) {
1422                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1423                     
1424                     //if the reads are okay write to output files
1425                     int process = count % processors;
1426                     
1427                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1428                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1429                     if (fqualfile != "") { //if you have quality info, print it
1430                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1431                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1432                         *(tempfiles[process][1]) << endl;
1433                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1434                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1435                         *(tempfiles[process][3]) << endl;
1436                     }
1437                     count++;
1438                     
1439                     //report progress
1440                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1441                 //}
1442             }
1443                 }
1444                 //report progress
1445                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1446         
1447         if (uniques.size() != 0) {
1448             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1449                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1450             }
1451             m->mothurOutEndLine();
1452         }
1453         
1454         //close files, delete ofstreams
1455         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1456         inReverseFasta.close(); 
1457         inForwardFasta.close(); 
1458         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1459         
1460         return files;
1461     }
1462     catch(exception& e) {
1463         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1464         exit(1);
1465     }
1466 }
1467 //**********************************************************************************************************************
1468 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1469     try {
1470         vector<pairFastqRead> reads;
1471         map<string, fastqRead>::iterator itUniques;
1472             
1473         if (!ignoref && !ignorer) {
1474             if (forward.name == reverse.name) { 
1475                 pairFastqRead temp(forward, reverse);
1476                 reads.push_back(temp);
1477             }else {
1478                 bool match = false;
1479                 //if no match are the names only different by 1 and 2?
1480                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1481                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1482                 if (tempFRead == tempRRead) {
1483                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1484                         forward.name = tempFRead;
1485                         reverse.name = tempRRead;
1486                         pairFastqRead temp(forward, reverse);
1487                         reads.push_back(temp);
1488                         match = true;
1489                     }
1490                 }
1491                 
1492                 if (!match) {
1493                     //look for forward pair
1494                     itUniques = uniques.find(forward.name);
1495                     if (itUniques != uniques.end()) {  //we have the pair for this read
1496                         pairFastqRead temp(forward, itUniques->second);
1497                         reads.push_back(temp);
1498                         uniques.erase(itUniques);
1499                     }else { //save this read for later
1500                         uniques[forward.name] = forward;
1501                     }
1502                     
1503                     //look for reverse pair
1504                     itUniques = uniques.find(reverse.name);
1505                     if (itUniques != uniques.end()) {  //we have the pair for this read
1506                         pairFastqRead temp(itUniques->second, reverse);
1507                         reads.push_back(temp);
1508                         uniques.erase(itUniques);
1509                     }else { //save this read for later
1510                         uniques[reverse.name] = reverse;
1511                     }
1512                 }
1513                                 
1514             }
1515         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1516             if (allowOne) {
1517                 fastqRead dummy;
1518                 pairFastqRead temp(forward, dummy);
1519                 reads.push_back(temp);
1520             }else {
1521                 //look for forward pair
1522                 itUniques = uniques.find(forward.name);
1523                 if (itUniques != uniques.end()) {  //we have the pair for this read
1524                     pairFastqRead temp(forward, itUniques->second);
1525                     reads.push_back(temp);
1526                     uniques.erase(itUniques);
1527                 }else { //save this read for later
1528                     uniques[forward.name] = forward;
1529                 }
1530             }
1531         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1532             if (allowOne) {
1533                 fastqRead dummy;
1534                 pairFastqRead temp(dummy, reverse);
1535                 reads.push_back(temp);
1536             }else {
1537                 //look for reverse pair
1538                 itUniques = uniques.find(reverse.name);
1539                 if (itUniques != uniques.end()) {  //we have the pair for this read
1540                     pairFastqRead temp(itUniques->second, reverse);
1541                     reads.push_back(temp);
1542                     uniques.erase(itUniques);
1543                 }else { //save this read for later
1544                     uniques[reverse.name] = reverse;
1545                 }
1546             }
1547         }//else ignore both and do nothing
1548         
1549         return reads;
1550     }
1551     catch(exception& e) {
1552         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1553         exit(1);
1554     }
1555 }
1556 //**********************************************************************************************************************
1557 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1558 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1559     try {
1560         vector<pairFastqRead> reads;
1561         map<string, pairFastqRead>::iterator itUniques;
1562         
1563         set<int> foundIndexes;
1564         for (int i = 0; i < thisReads.size(); i++) {
1565             bool found = false;
1566             for (int j = 0; j < indexes.size(); j++) {
1567                 
1568                 //incase only one index
1569                 string indexName = indexes[j].forward.name;
1570                 if (indexName == "") { indexName = indexes[j].reverse.name; }
1571                 
1572                 if (thisReads[i].forward.name == indexName){
1573                     thisReads[i].findex = indexes[j].forward;
1574                     thisReads[i].rindex = indexes[j].reverse;
1575                     reads.push_back(thisReads[i]);
1576                     found = true;
1577                     foundIndexes.insert(j);
1578                 }
1579             }
1580             
1581             if (!found) {
1582                 //look for forward pair
1583                 itUniques = uniques.find('i'+thisReads[i].forward.name);
1584                 if (itUniques != uniques.end()) {  //we have the pair for this read
1585                     thisReads[i].findex = itUniques->second.forward;
1586                     thisReads[i].rindex = itUniques->second.reverse;
1587                     reads.push_back(thisReads[i]);
1588                     uniques.erase(itUniques);
1589                 }else { //save this read for later
1590                     uniques['r'+thisReads[i].forward.name] = thisReads[i];
1591                 }
1592             }
1593         }
1594         
1595         if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1596             for (int j = 0; j < indexes.size(); j++) {
1597                 if (foundIndexes.count(j) == 0) { //we didnt find this one
1598                     //incase only one index
1599                     string indexName = indexes[j].forward.name;
1600                     if (indexName == "") { indexName = indexes[j].reverse.name; }
1601                     
1602                     //look for forward pair
1603                     itUniques = uniques.find('r'+indexName);
1604                     if (itUniques != uniques.end()) {  //we have the pair for this read
1605                         pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1606                         reads.push_back(temp);
1607                         uniques.erase(itUniques);
1608                     }else { //save this read for later
1609                         uniques['i'+indexName] = indexes[j];
1610                     }
1611                 }
1612             }
1613         }
1614        
1615         
1616         return reads;
1617     }
1618     catch(exception& e) {
1619         m->errorOut(e, "MakeContigsCommand", "mergeReads");
1620         exit(1);
1621     }
1622 }
1623 //**********************************************************************************************************************
1624 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1625     try {
1626         fastqRead read;
1627         
1628         ignore = false;
1629         
1630         //read sequence name
1631         string line = m->getline(in); m->gobble(in);
1632         vector<string> pieces = m->splitWhiteSpace(line);
1633         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1634         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1635         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1636         else { name = name.substr(1); }
1637         
1638         //read sequence
1639         string sequence = m->getline(in); m->gobble(in);
1640         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1641         
1642         //read sequence name
1643         line = m->getline(in); m->gobble(in);
1644         pieces = m->splitWhiteSpace(line);
1645         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1646         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1647         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1648         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1649         
1650         //read quality scores
1651         string quality = m->getline(in); m->gobble(in);
1652         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1653          
1654         //sanity check sequence length and number of quality scores match
1655         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1656         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1657         
1658         vector<int> qualScores = convertQual(quality);
1659         
1660         read.name = name;
1661         read.sequence = sequence;
1662         read.scores = qualScores;
1663         
1664         if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1665
1666         return read;
1667     }
1668     catch(exception& e) {
1669         m->errorOut(e, "MakeContigsCommand", "readFastq");
1670         exit(1);
1671     }
1672 }
1673 /**********************************************************************************************************************
1674 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1675     try {
1676         bool good = true;
1677         
1678         //do sequence lengths match
1679         if (forward.sequence.length() != reverse.sequence.length()) {
1680             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1681             good = false; 
1682         }
1683         
1684         //do number of qual scores match 
1685         if (forward.scores.size() != reverse.scores.size()) {
1686             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1687             good = false; 
1688         }
1689
1690         return good;
1691     }
1692     catch(exception& e) {
1693         m->errorOut(e, "MakeContigsCommand", "checkReads");
1694         exit(1);
1695     }
1696 }*/
1697 //***************************************************************************************************************
1698 //lines can be 2, 3, or 4 columns
1699 // forward.fastq reverse.fastq -> 2 column
1700 // groupName forward.fastq reverse.fastq -> 3 column
1701 // forward.fastq reverse.fastq forward.index.fastq  reverse.index.fastq  -> 4 column
1702 // forward.fastq reverse.fastq none  reverse.index.fastq  -> 4 column
1703 // forward.fastq reverse.fastq forward.index.fastq  none  -> 4 column
1704 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1705         try {
1706         vector< vector<string> > files;
1707         string forward, reverse, findex, rindex;
1708         
1709         ifstream in;
1710         m->openInputFile(filename, in);
1711         
1712         while(!in.eof()) {
1713             
1714             if (m->control_pressed) { return files; }
1715             
1716             string line = m->getline(in);  m->gobble(in);
1717             vector<string> pieces = m->splitWhiteSpace(line);
1718             
1719             string group = "";
1720             if (pieces.size() == 2) {
1721                 forward = pieces[0];
1722                 reverse = pieces[1];
1723                 group = "";
1724                 findex = "";
1725                 rindex = "";
1726             }else if (pieces.size() == 3) {
1727                 group = pieces[0];
1728                 forward = pieces[1];
1729                 reverse = pieces[2];
1730                 findex = "";
1731                 rindex = "";
1732                 createFileGroup = true;
1733             }else if (pieces.size() == 4) {
1734                 forward = pieces[0];
1735                 reverse = pieces[1];
1736                 findex = pieces[2];
1737                 rindex = pieces[3];
1738                 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1739                 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1740             }else {
1741                 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1742             }
1743             
1744             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1745             
1746             //check to make sure both are able to be opened
1747             ifstream in2;
1748             int openForward = m->openInputFile(forward, in2, "noerror");
1749             
1750             //if you can't open it, try default location
1751             if (openForward == 1) {
1752                 if (m->getDefaultPath() != "") { //default path is set
1753                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1754                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1755                     ifstream in3;
1756                     openForward = m->openInputFile(tryPath, in3, "noerror");
1757                     in3.close();
1758                     forward = tryPath;
1759                 }
1760             }
1761             
1762             //if you can't open it, try output location
1763             if (openForward == 1) {
1764                 if (m->getOutputDir() != "") { //default path is set
1765                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1766                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1767                     ifstream in4;
1768                     openForward = m->openInputFile(tryPath, in4, "noerror");
1769                     forward = tryPath;
1770                     in4.close();
1771                 }
1772             }
1773             
1774             if (openForward == 1) { //can't find it
1775                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1776             }else{  in2.close();  }
1777             
1778             ifstream in3;
1779             int openReverse = m->openInputFile(reverse, in3, "noerror");
1780             
1781             //if you can't open it, try default location
1782             if (openReverse == 1) {
1783                 if (m->getDefaultPath() != "") { //default path is set
1784                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1785                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1786                     ifstream in3;
1787                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1788                     in3.close();
1789                     reverse = tryPath;
1790                 }
1791             }
1792             
1793             //if you can't open it, try output location
1794             if (openReverse == 1) {
1795                 if (m->getOutputDir() != "") { //default path is set
1796                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1797                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1798                     ifstream in4;
1799                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1800                     reverse = tryPath;
1801                     in4.close();
1802                 }
1803             }
1804             
1805             if (openReverse == 1) { //can't find it
1806                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1807             }else{  in3.close();  }
1808             
1809             int openFindex = 0;
1810             if (findex != "") {
1811                 ifstream in4;
1812                 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1813                 
1814                 //if you can't open it, try default location
1815                 if (openFindex == 1) {
1816                     if (m->getDefaultPath() != "") { //default path is set
1817                         string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1818                         m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1819                         ifstream in5;
1820                         openFindex = m->openInputFile(tryPath, in5, "noerror");
1821                         in5.close();
1822                         findex = tryPath;
1823                     }
1824                 }
1825                 
1826                 //if you can't open it, try output location
1827                 if (openFindex == 1) {
1828                     if (m->getOutputDir() != "") { //default path is set
1829                         string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1830                         m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1831                         ifstream in6;
1832                         openFindex = m->openInputFile(tryPath, in6, "noerror");
1833                         findex = tryPath;
1834                         in6.close();
1835                     }
1836                 }
1837                 
1838                 if (openFindex == 1) { //can't find it
1839                     m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1840                 }
1841             }
1842             
1843             int openRindex = 0;
1844             if (rindex != "") {
1845                 ifstream in7;
1846                 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1847                 
1848                 //if you can't open it, try default location
1849                 if (openRindex == 1) {
1850                     if (m->getDefaultPath() != "") { //default path is set
1851                         string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1852                         m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1853                         ifstream in8;
1854                         openRindex = m->openInputFile(tryPath, in8, "noerror");
1855                         in8.close();
1856                         rindex = tryPath;
1857                     }
1858                 }
1859                 
1860                 //if you can't open it, try output location
1861                 if (openRindex == 1) {
1862                     if (m->getOutputDir() != "") { //default path is set
1863                         string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1864                         m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1865                         ifstream in9;
1866                         openRindex = m->openInputFile(tryPath, in9, "noerror");
1867                         rindex = tryPath;
1868                         in9.close();
1869                     }
1870                 }
1871                 
1872                 if (openRindex == 1) { //can't find it
1873                     m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1874                 }
1875             }
1876
1877             
1878             if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1879                 file2Group[files.size()] = group;
1880                 vector<string> pair;
1881                 pair.push_back(forward);
1882                 pair.push_back(reverse);
1883                 pair.push_back(findex);
1884                 pair.push_back(rindex);
1885                 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true;  }
1886                 files.push_back(pair);
1887             }
1888         }
1889         in.close();
1890         
1891         return files;
1892     }
1893     catch(exception& e) {
1894         m->errorOut(e, "MakeContigsCommand", "readFileNames");
1895         exit(1);
1896     }
1897 }
1898 //***************************************************************************************************************
1899 //illumina data requires paired forward and reverse data
1900 //BARCODE   atgcatgc   atgcatgc    groupName 
1901 //PRIMER   atgcatgc   atgcatgc    groupName  
1902 //PRIMER   atgcatgc   atgcatgc  
1903 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1904         try {
1905                 ifstream in;
1906                 m->openInputFile(oligosfile, in);
1907                 
1908                 ofstream test;
1909                 
1910                 string type, foligo, roligo, group;
1911         
1912                 int indexPrimer = 0;
1913                 int indexBarcode = 0;
1914         set<string> uniquePrimers;
1915         set<string> uniqueBarcodes;
1916                 
1917                 while(!in.eof()){
1918             
1919                         in >> type; 
1920     
1921                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1922             
1923                         if(type[0] == '#'){
1924                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1925                                 m->gobble(in);
1926                         }
1927                         else{
1928                                 m->gobble(in);
1929                                 //make type case insensitive
1930                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1931                                 
1932                                 in >> foligo;
1933                 
1934                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1935                                 
1936                                 for(int i=0;i<foligo.length();i++){
1937                                         foligo[i] = toupper(foligo[i]);
1938                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1939                                 }
1940                                 
1941                                 if(type == "PRIMER"){
1942                                         m->gobble(in);
1943                                         
1944                     in >> roligo;
1945                     
1946                     for(int i=0;i<roligo.length();i++){
1947                         roligo[i] = toupper(roligo[i]);
1948                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1949                     }
1950                     //roligo = reverseOligo(roligo);
1951                     
1952                     if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1953                     
1954                     group = "";
1955                     
1956                                         // get rest of line in case there is a primer name
1957                                         while (!in.eof())       {       
1958                                                 char c = in.get(); 
1959                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1960                                                 else if (c == 32 || c == 9){;} //space or tab
1961                                                 else {  group += c;  }
1962                                         } 
1963                     
1964                     oligosPair newPrimer(foligo, roligo);
1965                     
1966                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1967                     
1968                                         //check for repeat barcodes
1969                     string tempPair = foligo+roligo;
1970                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1971                     else { uniquePrimers.insert(tempPair); }
1972                                         
1973                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1974                     
1975                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1976                                         primerNameVector.push_back(group);
1977                                 }else if(type == "BARCODE"){
1978                                         m->gobble(in);
1979                                         
1980                     in >> roligo;
1981                     
1982                     for(int i=0;i<roligo.length();i++){
1983                         roligo[i] = toupper(roligo[i]);
1984                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1985                     }
1986                     //roligo = reverseOligo(roligo);
1987                     
1988                     oligosPair newPair(foligo, roligo);
1989                     
1990                     if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
1991                     
1992                     group = "";
1993                     while (!in.eof())   {       
1994                                                 char c = in.get(); 
1995                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1996                                                 else if (c == 32 || c == 9){;} //space or tab
1997                                                 else {  group += c;  }
1998                                         } 
1999                                         
2000                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2001                         
2002                     //check for repeat barcodes
2003                     string tempPair = foligo+roligo;
2004                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
2005                     else { uniqueBarcodes.insert(tempPair); }
2006                         
2007                     barcodes[indexBarcode]=newPair; indexBarcode++;
2008                                         barcodeNameVector.push_back(group);
2009                                 }else if(type == "LINKER"){
2010                                         linker.push_back(foligo);
2011                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2012                                 }else if(type == "SPACER"){
2013                                         spacer.push_back(foligo);
2014                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2015                                 }
2016                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2017                         }
2018                         m->gobble(in);
2019                 }       
2020                 in.close();
2021                 
2022                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
2023                 
2024                 //add in potential combos
2025                 if(barcodeNameVector.size() == 0){
2026             oligosPair temp("", "");
2027                         barcodes[0] = temp;
2028                         barcodeNameVector.push_back("");                        
2029                 }
2030                 
2031                 if(primerNameVector.size() == 0){
2032             oligosPair temp("", "");
2033                         primers[0] = temp;
2034                         primerNameVector.push_back("");                 
2035                 }
2036                 
2037                 fastaFileNames.resize(barcodeNameVector.size());
2038                 for(int i=0;i<fastaFileNames.size();i++){
2039                         fastaFileNames[i].assign(primerNameVector.size(), "");
2040                 }
2041                 
2042                 if(allFiles){
2043                         set<string> uniqueNames; //used to cleanup outputFileNames
2044                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2045                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2046                                         
2047                                         string primerName = primerNameVector[itPrimer->first];
2048                                         string barcodeName = barcodeNameVector[itBar->first];
2049                     
2050                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
2051                                         else {
2052                         string comboGroupName = "";
2053                         string fastaFileName = "";
2054                         string qualFileName = "";
2055                         string nameFileName = "";
2056                         string countFileName = "";
2057                         
2058                         if(primerName == ""){
2059                             comboGroupName = barcodeNameVector[itBar->first];
2060                         }
2061                         else{
2062                             if(barcodeName == ""){
2063                                 comboGroupName = primerNameVector[itPrimer->first];
2064                             }
2065                             else{
2066                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2067                             }
2068                         }
2069                         
2070                         
2071                         ofstream temp;
2072                         fastaFileName = rootname + comboGroupName + ".fasta";
2073                         if (uniqueNames.count(fastaFileName) == 0) {
2074                             outputNames.push_back(fastaFileName);
2075                             outputTypes["fasta"].push_back(fastaFileName);
2076                             uniqueNames.insert(fastaFileName);
2077                         }
2078                         
2079                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2080                         m->openOutputFile(fastaFileName, temp);         temp.close();
2081                     }
2082                                 }
2083                         }
2084                 }
2085                 
2086                 bool allBlank = true;
2087                 for (int i = 0; i < barcodeNameVector.size(); i++) {
2088                         if (barcodeNameVector[i] != "") {
2089                                 allBlank = false;
2090                                 break;
2091                         }
2092                 }
2093                 for (int i = 0; i < primerNameVector.size(); i++) {
2094                         if (primerNameVector[i] != "") {
2095                                 allBlank = false;
2096                                 break;
2097                         }
2098                 }
2099         
2100                 if (allBlank) {
2101                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
2102                         allFiles = false;
2103                         return false;
2104                 }
2105                 
2106                 return true;
2107                 
2108         }
2109         catch(exception& e) {
2110                 m->errorOut(e, "MakeContigsCommand", "getOligos");
2111                 exit(1);
2112         }
2113 }
2114 //********************************************************************/
2115 string MakeContigsCommand::reverseOligo(string oligo){
2116         try {
2117         string reverse = "";
2118         
2119         for(int i=oligo.length()-1;i>=0;i--){
2120             
2121             if(oligo[i] == 'A')         {       reverse += 'T'; }
2122             else if(oligo[i] == 'T'){   reverse += 'A'; }
2123             else if(oligo[i] == 'U'){   reverse += 'A'; }
2124             
2125             else if(oligo[i] == 'G'){   reverse += 'C'; }
2126             else if(oligo[i] == 'C'){   reverse += 'G'; }
2127             
2128             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2129             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2130             
2131             else if(oligo[i] == 'M'){   reverse += 'K'; }
2132             else if(oligo[i] == 'K'){   reverse += 'M'; }
2133             
2134             else if(oligo[i] == 'W'){   reverse += 'W'; }
2135             else if(oligo[i] == 'S'){   reverse += 'S'; }
2136             
2137             else if(oligo[i] == 'B'){   reverse += 'V'; }
2138             else if(oligo[i] == 'V'){   reverse += 'B'; }
2139             
2140             else if(oligo[i] == 'D'){   reverse += 'H'; }
2141             else if(oligo[i] == 'H'){   reverse += 'D'; }
2142             
2143             else                                                {       reverse += 'N'; }
2144         }
2145         
2146         
2147         return reverse;
2148     }
2149         catch(exception& e) {
2150                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2151                 exit(1);
2152         }
2153 }
2154 //**********************************************************************************************************************
2155 vector<int> MakeContigsCommand::convertQual(string qual) {
2156         try {
2157                 vector<int> qualScores;
2158         bool negativeScores = false;
2159                 
2160                 for (int i = 0; i < qual.length(); i++) { 
2161             
2162             int temp = 0;
2163             temp = int(qual[i]);
2164             if (format == "illumina") {
2165                 temp -= 64; //char '@'
2166             }else if (format == "illumina1.8+") {
2167                     temp -= int('!'); //char '!'
2168             }else if (format == "solexa") {
2169                 temp = int(convertTable[temp]); //convert to sanger
2170                 temp -= int('!'); //char '!'
2171             }else {
2172                 temp -= int('!'); //char '!'
2173             }
2174             
2175             if (temp < -5) { negativeScores = true; }
2176                         qualScores.push_back(temp);
2177                 }
2178                 
2179         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
2180         
2181                 return qualScores;
2182         }
2183         catch(exception& e) {
2184                 m->errorOut(e, "MakeContigsCommand", "convertQual");
2185                 exit(1);
2186         }
2187 }
2188
2189 //**********************************************************************************************************************
2190
2191
2192
2193