]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
skip cases where scores.size is less than the base map
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22         CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23         CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57         helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60         helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
63         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
64                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65         helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process.  \n";
66         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
67         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
74         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
77         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
79         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
80
81         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82         helpString += "The make.contigs command should be in the following format: \n";
83                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
85                 return helpString;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
94     try {
95         string pattern = "";
96         
97         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
98         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
99         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
100         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
101         
102         return pattern;
103     }
104     catch(exception& e) {
105         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
106         exit(1);
107     }
108 }
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){       
111         try {
112                 abort = true; calledHelp = true; 
113                 setParameters();
114                 vector<string> tempOutNames;
115                 outputTypes["fasta"] = tempOutNames;
116         outputTypes["group"] = tempOutNames;
117         outputTypes["report"] = tempOutNames;
118     }
119         catch(exception& e) {
120                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option)  {
126         try {
127                 abort = false; calledHelp = false;
128         createFileGroup = false; createOligosGroup = false;
129         
130                 //allow user to run help
131                 if(option == "help") { help(); abort = true; calledHelp = true; }
132                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
133                 
134                 else {
135                         vector<string> myArray = setParameters();
136                         
137                         OptionParser parser(option);
138                         map<string, string> parameters = parser.getParameters(); 
139                         
140                         ValidParameters validParameter("pairwise.seqs");
141                         map<string, string>::iterator it;
142                         
143                         //check to make sure all parameters are valid for command
144                         for (it = parameters.begin(); it != parameters.end(); it++) { 
145                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
146                         }
147                         
148                         //initialize outputTypes
149                         vector<string> tempOutNames;
150                         outputTypes["fasta"] = tempOutNames;
151             outputTypes["report"] = tempOutNames;
152             outputTypes["group"] = tempOutNames;
153                         
154             
155                         //if the user changes the input directory command factory will send this info to us in the output parameter 
156                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
157                         if (inputDir == "not found"){   inputDir = "";          }
158                         else { 
159                                 string path;
160                 it = parameters.find("ffastq");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
166                                 }
167                 
168                 it = parameters.find("rfastq");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
174                                 }
175                 
176                 it = parameters.find("ffasta");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
182                                 }
183                 
184                 it = parameters.find("rfasta");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
190                                 }
191                 
192                 it = parameters.find("fqfile");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
198                                 }
199                 
200                 it = parameters.find("rqfile");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
206                                 }
207                 
208                 it = parameters.find("file");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
214                                 }
215                 
216                 it = parameters.find("oligos");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
222                                 }
223                 
224                 it = parameters.find("findex");
225                                 //user has given a template file
226                                 if(it != parameters.end()){
227                                         path = m->hasPath(it->second);
228                                         //if the user has not given a path then, add inputdir. else leave path alone.
229                                         if (path == "") {       parameters["findex"] = inputDir + it->second;           }
230                                 }
231                 
232                 it = parameters.find("rindex");
233                                 //user has given a template file
234                                 if(it != parameters.end()){
235                                         path = m->hasPath(it->second);
236                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                         if (path == "") {       parameters["rindex"] = inputDir + it->second;           }
238                                 }
239             }
240             
241             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242                         if (ffastqfile == "not open") {  abort = true; }        
243                         else if (ffastqfile == "not found") { ffastqfile = ""; }
244                         
245                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246                         if (rfastqfile == "not open") {  abort = true; }        
247                         else if (rfastqfile == "not found") { rfastqfile = "";  }
248             
249             ffastafile = validParameter.validFile(parameters, "ffasta", true);
250                         if (ffastafile == "not open") {  abort = true; }        
251                         else if (ffastafile == "not found") { ffastafile = ""; }
252                         
253                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
254                         if (rfastafile == "not open") {  abort = true; }        
255                         else if (rfastafile == "not found") { rfastafile = "";  }
256             
257             fqualfile = validParameter.validFile(parameters, "fqfile", true);
258                         if (fqualfile == "not open") {  abort = true; } 
259                         else if (fqualfile == "not found") { fqualfile = ""; }
260                         
261                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
262                         if (rqualfile == "not open") {  abort = true; } 
263                         else if (rqualfile == "not found") { rqualfile = "";  }
264             
265             file = validParameter.validFile(parameters, "file", true);
266                         if (file == "not open") {  abort = true; }      
267                         else if (file == "not found") { file = "";  }
268             
269             //provide at least
270             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
280             }
281             
282             oligosfile = validParameter.validFile(parameters, "oligos", true);
283                         if (oligosfile == "not found")      {   oligosfile = "";        }
284                         else if(oligosfile == "not open")   {   abort = true;       } 
285                         else {   m->setOligosFile(oligosfile);          }
286             
287             findexfile = validParameter.validFile(parameters, "findex", true);
288                         if (findexfile == "not found")      {   findexfile = "";        }
289                         else if(findexfile == "not open")   {   abort = true;       }
290             
291             rindexfile = validParameter.validFile(parameters, "rindex", true);
292                         if (rindexfile == "not found")      {   rindexfile = "";        }
293                         else if(rindexfile == "not open")   {   abort = true;       }
294         
295             if ((rindexfile != "") || (findexfile != "")) {
296                                 if (oligosfile == ""){
297                                         oligosfile = m->getOligosFile();
298                                         if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n");  }
299                                         else {
300                         m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
301                                         }
302                                 }
303                 
304                 //can only use an index file with the fastq parameters not fasta and qual
305                 if ((ffastafile != "") || (rfastafile != "")) {
306                     m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
307                 }
308                         }
309             
310             //if the user changes the output directory command factory will send this info to us in the output parameter 
311                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
312                  outputDir = ""; 
313             }
314                         
315
316                         //check for optional parameter and set defaults
317                         // ...at some point should added some additional type checking...
318                         string temp;
319                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
320                         m->mothurConvert(temp, match);  
321                         
322                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
323                         m->mothurConvert(temp, misMatch);  
324             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
325                         
326                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
327                         m->mothurConvert(temp, gapOpen);  
328             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
329                         
330                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
331                         m->mothurConvert(temp, gapExtend); 
332             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
333                         
334             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
335                         m->mothurConvert(temp, insert); 
336             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
337
338             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
339                         m->mothurConvert(temp, deltaq);
340             
341                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
342                         m->setProcessors(temp);
343                         m->mothurConvert(temp, processors);
344             
345             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
346                         m->mothurConvert(temp, bdiffs);
347                         
348                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
349                         m->mothurConvert(temp, pdiffs);
350             
351   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
352 //                      m->mothurConvert(temp, ldiffs);
353             ldiffs = 0;
354             
355  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
356  //           m->mothurConvert(temp, sdiffs);
357             sdiffs = 0;
358                         
359                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
360                         m->mothurConvert(temp, tdiffs);
361                         
362                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
363
364             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
365                         allFiles = m->isTrue(temp);
366             
367             
368             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
369                         trimOverlap = m->isTrue(temp);
370                         
371                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
372                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
373             
374             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
375             
376             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
377                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
378                                 abort=true;
379                         }
380             
381             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382             for (int i = -64; i < 65; i++) { 
383                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384                 convertTable.push_back(temp);
385             }
386         }
387                 
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
391                 exit(1);
392         }
393 }
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
396         try {
397                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
398         
399         //read ffastq and rfastq files creating fasta and qual files.
400         //this function will create a forward and reverse, fasta and qual files for each processor.
401         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
402         unsigned long int numReads = 0;
403         int start = time(NULL);
404         longestBase = 1000;
405         m->mothurOut("Reading fastq data...\n"); 
406         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407         m->mothurOut("Done.\n");
408        
409         if (m->control_pressed) { return 0; }
410         
411         map<string, string> cvars;
412         string compOutputDir = outputDir;
413         if (outputDir == "") { compOutputDir = m->hasPath(file); }
414         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
415         cvars["[tag]"] = "";
416         string compositeGroupFile = getOutputFileName("group",cvars);
417         cvars["[tag]"] = "trim";
418         string compositeFastaFile = getOutputFileName("fasta",cvars);
419         cvars["[tag]"] = "scrap";
420         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
421         cvars["[tag]"] = "";
422         string compositeMisMatchFile = getOutputFileName("report",cvars);
423         
424         if (filesToProcess.size() > 1) { //clear files for append below
425             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
432         }
433         
434         map<string, int> totalGroupCounts;
435         
436         for (int l = 0; l < filesToProcess.size(); l++) {
437             
438             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
439             
440             groupCounts.clear();
441             groupMap.clear();
442             vector<vector<string> > fastaFileNames;
443             createOligosGroup = false;
444             string outputGroupFileName;
445             map<string, string> variables; 
446             string thisOutputDir = outputDir;
447             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449             variables["[tag]"] = "";
450             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
451             if (createOligosGroup || createFileGroup) {
452                 outputGroupFileName = getOutputFileName("group",variables);
453             }
454             
455             //give group in file file precedence
456             if (createFileGroup) {  createOligosGroup = false; }
457             
458             variables["[tag]"] = "trim";
459             string outFastaFile = getOutputFileName("fasta",variables);
460             variables["[tag]"] = "scrap";
461             string outScrapFastaFile = getOutputFileName("fasta",variables);
462             variables["[tag]"] = "";
463             string outMisMatchFile = getOutputFileName("report",variables);
464                         
465             m->mothurOut("Making contigs...\n"); 
466             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
467             
468             //remove temp fasta and qual files
469             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
470             
471             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
472             
473             if(allFiles){
474                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475                 map<string, string>::iterator it;
476                 set<string> namesToRemove;
477                 for(int i=0;i<fastaFileNames.size();i++){
478                     for(int j=0;j<fastaFileNames[0].size();j++){
479                         if (fastaFileNames[i][j] != "") {
480                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481                                 if(m->isBlank(fastaFileNames[i][j])){
482                                     m->mothurRemove(fastaFileNames[i][j]);
483                                     namesToRemove.insert(fastaFileNames[i][j]);
484                                 }else{  
485                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
486                                     if (it == uniqueFastaNames.end()) { 
487                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
488                                     }   
489                                 }
490                             }
491                         }
492                     }
493                 }
494                 
495                 //remove names for outputFileNames, just cleans up the output
496                 vector<string> outputNames2;
497                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498                 outputNames = outputNames2;
499                 
500                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
501                     ifstream in;
502                     m->openInputFile(it->first, in);
503                     
504                     ofstream out;
505                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507                     m->openOutputFile(thisGroupName, out);
508                     
509                     while (!in.eof()){
510                         if (m->control_pressed) { break; }
511                         
512                         Sequence currSeq(in); m->gobble(in);
513                         out << currSeq.getName() << '\t' << it->second << endl;
514                     }
515                     out.close();
516                     in.close();
517                 }
518             }
519             
520             if (createFileGroup || createOligosGroup) {
521                 ofstream outGroup;
522                 m->openOutputFile(outputGroupFileName, outGroup);
523                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
525                 }
526                 outGroup.close();
527             }
528             
529             if (filesToProcess.size() > 1) { //merge into large combo files
530                 if (createFileGroup || createOligosGroup) {
531                     if (l == 0) {
532                         ofstream outCGroup;
533                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
535                     }
536                     m->appendFiles(outputGroupFileName, compositeGroupFile);
537                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
538                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
539                     
540                     for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541                         map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542                         if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543                         else { itTemp->second += itGroups->second; } //existing group, update total
544                     }
545                 }
546                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
547                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
548                 m->appendFiles(outFastaFile, compositeFastaFile);
549                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
550                 if (!allFiles) {
551                     m->mothurRemove(outMisMatchFile);
552                     m->mothurRemove(outFastaFile);
553                     m->mothurRemove(outScrapFastaFile);
554                 }else {
555                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
558                 }
559             }else {
560                 totalGroupCounts = groupCounts;
561                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564                 if (createFileGroup || createOligosGroup) {
565                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
566                 }
567             }
568             m->mothurOut("Done.\n");
569         }
570         
571         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
572         
573         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
574         
575                 //output group counts
576                 m->mothurOutEndLine();
577                 int total = 0;
578                 if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
579                 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
581                 }
582                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
583                 
584                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
585         
586         string currentFasta = "";
587                 itTypes = outputTypes.find("fasta");
588                 if (itTypes != outputTypes.end()) {
589                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
590                 }
591         
592         string currentGroup = "";
593                 itTypes = outputTypes.find("group");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
596                 }
597                 
598         //output files created by command
599                 m->mothurOutEndLine();
600                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
602                 m->mothurOutEndLine();
603
604         return 0;
605     }
606         catch(exception& e) {
607                 m->errorOut(e, "MakeContigsCommand", "execute");
608                 exit(1);
609         }
610 }
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
613         try {
614         vector< vector< vector<string> > > filesToProcess;
615         
616         if (ffastqfile != "") { //reading one file
617             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618             //adjust for really large processors or really small files
619             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620             if (numReads < processors) { 
621                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622                 files.resize(numReads);
623                 processors = numReads; 
624             }
625             filesToProcess.push_back(files);
626         }else if (file != "") { //reading multiple files
627             //return only valid pairs
628             vector< vector<string> > filePairsToProcess = readFileNames(file);
629             
630             if (m->control_pressed) { return filesToProcess; }
631             
632             if (filePairsToProcess.size() != 0) {
633                 for (int i = 0; i < filePairsToProcess.size(); i++) {
634                     
635                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
636                     
637                     unsigned long int thisFilesReads;
638                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
639                     
640                     //adjust for really large processors or really small files
641                     if (thisFilesReads < processors) { 
642                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
643                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644                         //remove from file2Group if necassary
645                         map<int, string> cFile2Group;
646                         for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647                             if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648                             else if ((it->first) == i) { } //do nothing, we removed files for i
649                             else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
650                         }
651                         file2Group = cFile2Group;
652                     }else {
653                         filesToProcess.push_back(files);
654                         numReads += thisFilesReads;
655                     }
656                 }
657                 //all files are bad
658                 if (numReads == 0) {  m->control_pressed = true; }
659             }
660         }else if (ffastafile != "") {
661             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662             //adjust for really large processors or really small files
663             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664             if (numReads < processors) { 
665                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666                 files.resize(numReads);
667                 processors = numReads; 
668             }
669             filesToProcess.push_back(files);
670         }else { m->control_pressed = true; } //should not get here
671         
672         return filesToProcess;
673     }
674         catch(exception& e) {
675                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
676                 exit(1);
677         }
678 }
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
681         try {
682                 int num = 0;
683                 vector<int> processIDS;
684         string group = "";
685         map<int, string>::iterator it = file2Group.find(index);
686         if (it != file2Group.end()) { group = it->second; }
687         
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
689                 int process = 0;
690                 
691                 //loop through and create all the processes you want
692                 while (process != processors-1) {
693                         pid_t pid = fork();
694                         
695                         if (pid > 0) {
696                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
697                                 process++;
698                         }else if (pid == 0){
699                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
700                 
701                                 if(allFiles){
702                                         ofstream temp;
703                     
704                                         for(int i=0;i<tempFASTAFileNames.size();i++){
705                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706                                                         if (tempFASTAFileNames[i][j] != "") {
707                                                                 tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
708                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
709                                                         }
710                                                 }
711                                         }
712                                 }
713                 
714                                 num = driver(files[process], 
715                              outputFasta + m->mothurGetpid(process) + ".temp",
716                              outputScrapFasta + m->mothurGetpid(process) + ".temp",
717                              outputMisMatches + m->mothurGetpid(process) + ".temp",
718                              tempFASTAFileNames, process, group);
719                                 
720                                 //pass groupCounts to parent
721                 ofstream out;
722                 string tempFile = m->mothurGetpid(process) + ".num.temp";
723                 m->openOutputFile(tempFile, out);
724                 out << num << endl;
725                                 if (createFileGroup || createOligosGroup) {
726                                         out << groupCounts.size() << endl;
727                                         
728                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729                                                 out << it->first << '\t' << it->second << endl;
730                                         }
731                     
732                     out << groupMap.size() << endl;
733                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734                                                 out << it->first << '\t' << it->second << endl;
735                                         }
736                                 }
737                 out.close();
738                                 
739                                 exit(0);
740                         }else { 
741                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
742                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
743                                 exit(0);
744                         }
745                 }
746                 
747         ofstream temp;
748                 m->openOutputFile(outputFasta, temp);           temp.close();
749         m->openOutputFile(outputScrapFasta, temp);              temp.close();
750                 
751                 //do my part
752                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
753                 
754                 //force parent to wait until all the processes are done
755                 for (int i=0;i<processIDS.size();i++) { 
756                         int temp = processIDS[i];
757                         wait(&temp);
758                 }
759         
760                 for (int i = 0; i < processIDS.size(); i++) {
761             ifstream in;
762             string tempFile = toString(processIDS[i]) + ".num.temp";
763             m->openInputFile(tempFile, in);
764             int tempNum;
765             in >> tempNum; num += tempNum; m->gobble(in);
766             
767                         if (createFileGroup || createOligosGroup) {
768                                 string group;
769                                 in >> tempNum; m->gobble(in);
770                                 
771                                 if (tempNum != 0) {
772                                         for (int j = 0; j < tempNum; j++) { 
773                         int groupNum;
774                                                 in >> group >> groupNum; m->gobble(in);
775                         
776                                                 map<string, int>::iterator it = groupCounts.find(group);
777                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
778                                                 else { groupCounts[it->first] += groupNum; }
779                                         }
780                                 }
781                 in >> tempNum; m->gobble(in);
782                 if (tempNum != 0) {
783                                         for (int j = 0; j < tempNum; j++) { 
784                         string group, seqName;
785                                                 in >> seqName >> group; m->gobble(in);
786                         
787                                                 map<string, string>::iterator it = groupMap.find(seqName);
788                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
789                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
790                                         }
791                                 }
792                         }
793             in.close(); m->mothurRemove(tempFile);
794         }
795     #else
796         
797         //////////////////////////////////////////////////////////////////////////////////////////////////////
798                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
799                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
800                 //////////////////////////////////////////////////////////////////////////////////////////////////////
801                 
802                 vector<contigsData*> pDataArray; 
803                 DWORD   dwThreadIdArray[processors-1];
804                 HANDLE  hThreadArray[processors-1]; 
805                 
806                 //Create processor worker threads.
807                 for( int h=0; h<processors-1; h++ ){
808                         string extension = "";
809                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
811                         
812             if(allFiles){
813                 ofstream temp;
814                 
815                 for(int i=0;i<tempFASTAFileNames.size();i++){
816                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
817                         if (tempFASTAFileNames[i][j] != "") {
818                             tempFASTAFileNames[i][j] += extension;
819                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
820                         }
821                     }
822                 }
823             }
824                                  
825                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826                         pDataArray.push_back(tempcontig);
827             
828                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
829                 }
830         
831         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
832
833         if(allFiles){
834             ofstream temp;
835             string extension = toString(processors-1) + ".temp";
836             
837             for(int i=0;i<tempFASTAFileNames.size();i++){
838                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839                     if (tempFASTAFileNames[i][j] != "") {
840                         tempFASTAFileNames[i][j] += extension;
841                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
842                     }
843                 }
844             }
845         }
846
847                 //parent do my part
848                 ofstream temp;
849                 m->openOutputFile(outputFasta, temp);           temp.close();
850         m->openOutputFile(outputScrapFasta, temp);              temp.close();
851         
852         //do my part
853         processIDS.push_back(processors-1);
854                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
855         
856                 //Wait until all threads have terminated.
857                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
858                 
859                 //Close all thread handles and free memory allocations.
860                 for(int i=0; i < pDataArray.size(); i++){
861                         num += pDataArray[i]->count;
862             if (!pDataArray[i]->done) {
863                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
864             }
865             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866                 map<string, int>::iterator it2 = groupCounts.find(it->first);
867                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868                 else { groupCounts[it->first] += it->second; }
869             }
870             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871                 map<string, string>::iterator it2 = groupMap.find(it->first);
872                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
873                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
874             }
875             CloseHandle(hThreadArray[i]);
876                         delete pDataArray[i];
877         }
878                                 
879     #endif      
880         
881         for (int i = 0; i < processIDS.size(); i++) {
882                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
884                         
885                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
887             
888             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
890             
891             if(allFiles){
892                                 for(int j=0;j<fastaFileNames.size();j++){
893                                         for(int k=0;k<fastaFileNames[j].size();k++){
894                                                 if (fastaFileNames[j][k] != "") {
895                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
897                                                 }
898                                         }
899                                 }
900                         }
901                 }
902                 
903                 return num;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
907                 exit(1);
908         }
909 }
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
912     try {
913         
914         Alignment* alignment;
915         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
916                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
917         
918         int num = 0;
919         string thisffastafile = files[0];
920         string thisfqualfile = files[1];
921         string thisrfastafile = files[2];
922         string thisrqualfile = files[3];
923         string thisfindexfile = files[4];
924         string thisrindexfile = files[5];
925         
926         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
927         
928         ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929         ofstream outFasta, outMisMatch, outScrapFasta;
930         m->openInputFile(thisffastafile, inFFasta);
931         m->openInputFile(thisrfastafile, inRFasta);
932         if (thisfqualfile != "") {
933             m->openInputFile(thisfqualfile, inFQual);
934             m->openInputFile(thisrqualfile, inRQual);
935         }
936         
937         if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex);  }
938         if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex);  }
939         
940         m->openOutputFile(outputFasta, outFasta);
941         m->openOutputFile(outputScrapFasta, outScrapFasta);
942         m->openOutputFile(outputMisMatches, outMisMatch);
943         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
944         
945         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
946         
947         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
948             
949             if (m->control_pressed) { break; }
950             
951             int success = 1;
952             string trashCode = "";
953             int currentSeqsDiffs = 0;
954
955             //read seqs and quality info
956             Sequence fSeq(inFFasta); m->gobble(inFFasta);
957             Sequence rSeq(inRFasta); m->gobble(inRFasta);
958             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959             if (thisfqualfile != "") {
960                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
962             }
963             Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
964             if (thisfindexfile != "") {
965                 Sequence temp(inFIndex); m->gobble(inFIndex);
966                 findexBarcode.setAligned(temp.getAligned());
967             }
968             
969             if (thisrindexfile != "") {
970                 Sequence temp(inRIndex); m->gobble(inRIndex);
971                 rindexBarcode.setAligned(temp.getAligned());
972             }
973             
974             int barcodeIndex = 0;
975             int primerIndex = 0;
976                         
977             if(barcodes.size() != 0){
978                 if (thisfqualfile != "") {
979                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
980                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
981                     }else {
982                         success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
983                     }
984                 }else {
985                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
986                 }
987                 if(success > bdiffs)            {       trashCode += 'b';       }
988                 else{ currentSeqsDiffs += success;  }
989             }
990             
991             if(primers.size() != 0){
992                 if (thisfqualfile != "") {
993                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
994                 }else {
995                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
996                 }
997                 if(success > pdiffs)            {       trashCode += 'f';       }
998                 else{ currentSeqsDiffs += success;  }
999             }
1000             
1001             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
1002             
1003             //flip the reverse reads
1004             rSeq.reverseComplement();
1005             if (thisfqualfile != "") { rQual->flipQScores(); }
1006
1007             //pairwise align
1008             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011             fSeq.setAligned(alignment->getSeqAAln());
1012             rSeq.setAligned(alignment->getSeqBAln());
1013             int length = fSeq.getAligned().length();
1014             
1015             //traverse alignments merging into one contiguous seq
1016             string contig = "";
1017             int numMismatches = 0;
1018             string seq1 = fSeq.getAligned();
1019             string seq2 = rSeq.getAligned();
1020             vector<int> scores1, scores2; 
1021             if (thisfqualfile != "") {
1022                 scores1 = fQual->getQualityScores();
1023                 scores2 = rQual->getQualityScores();
1024                 delete fQual; delete rQual;
1025             }
1026             
1027             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028             int overlapStart = fSeq.getStartPos();
1029             int seq2Start = rSeq.getStartPos();
1030             
1031             //bigger of the 2 starting positions is the location of the overlapping start
1032             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033                 overlapStart = seq2Start; 
1034                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
1035             }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
1037             }
1038             
1039             int seq1End = fSeq.getEndPos();
1040             int seq2End = rSeq.getEndPos();
1041             int overlapEnd = seq1End;
1042             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
1043             
1044             int oStart = contig.length();
1045             //cout << fSeq.getAligned()  << endl; cout << rSeq.getAligned() << endl;
1046             for (int i = overlapStart; i < overlapEnd; i++) {
1047                 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1048                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1049                     contig += seq1[i];
1050                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1051                     if (thisfqualfile != "") {
1052                         if (scores2[BBaseMap[i]] <= insert) { } //
1053                         else { contig += seq2[i];  }
1054                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
1055                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1056                     if (thisfqualfile != "") {
1057                         if (scores1[ABaseMap[i]] <= insert) { } //
1058                         else { contig += seq1[i];  }
1059                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
1060                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1061                     if (thisfqualfile != "") {
1062                       if (ABaseMap[i] <= scores1.size() && BBaseMap[i] <= scores2.size() && // this check is dumb; we should determine this earlier
1063                           abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1064                             char c = seq1[i];
1065                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1066                             contig += c;
1067                         }else { //if no, base becomes n
1068                             contig += 'N';
1069                         }
1070                         numMismatches++;
1071                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1072                 }else { //should never get here
1073                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1074                 }
1075             }
1076             int oend = contig.length();
1077             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1078                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
1079             }else { //seq2 ends before seq1 so take from overlap to length from seq1
1080                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
1081             }
1082             //cout << contig << endl;
1083             //exit(1);
1084             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
1085             
1086             if(trashCode.length() == 0){
1087                 bool ignore = false;
1088                 
1089                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1090                 
1091                 if (createOligosGroup) {
1092                     if(barcodes.size() != 0){
1093                         string thisGroup = barcodeNameVector[barcodeIndex];
1094                         if (primers.size() != 0) { 
1095                             if (primerNameVector[primerIndex] != "") { 
1096                                 if(thisGroup != "") {
1097                                     thisGroup += "." + primerNameVector[primerIndex]; 
1098                                 }else {
1099                                     thisGroup = primerNameVector[primerIndex]; 
1100                                 }
1101                             } 
1102                         }
1103                         
1104                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1105                         
1106                         int pos = thisGroup.find("ignore");
1107                         if (pos == string::npos) {
1108                             groupMap[fSeq.getName()] = thisGroup; 
1109                         
1110                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1111                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1112                             else { groupCounts[it->first] ++; }
1113                         }else { ignore = true; }
1114                         
1115                     }
1116                 }else if (createFileGroup) {
1117                     int pos = group.find("ignore");
1118                     if (pos == string::npos) {
1119                         groupMap[fSeq.getName()] = group;
1120                         
1121                         map<string, int>::iterator it = groupCounts.find(group);
1122                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1123                         else { groupCounts[it->first] ++; }
1124                     }else { ignore = true; }
1125                 }
1126                 if (m->debug) { m->mothurOut("\n"); }
1127                 
1128                 if(allFiles && !ignore){
1129                     ofstream output;
1130                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1131                     output << ">" << fSeq.getName() << endl << contig << endl;
1132                     output.close();
1133                 }
1134                 
1135                 //output
1136                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1137                 int numNs = 0;
1138                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1139                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1140             }else {
1141                 //output
1142                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1143             }
1144             num++;
1145             
1146                         //report progress
1147                         if((num) % 1000 == 0){  m->mothurOutJustToScreen(toString(num)); m->mothurOutEndLine();         }
1148                 }
1149         
1150                 //report progress
1151                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1152         
1153         inFFasta.close();
1154         inRFasta.close();
1155         outFasta.close();
1156         outScrapFasta.close();
1157         outMisMatch.close();
1158         if (thisfqualfile != "") {
1159             inFQual.close();
1160             inRQual.close();
1161         }
1162         delete alignment;
1163         
1164         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1165     
1166         return num;
1167     }
1168         catch(exception& e) {
1169                 m->errorOut(e, "MakeContigsCommand", "driver");
1170                 exit(1);
1171         }
1172 }
1173 //**********************************************************************************************************************
1174 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1175     try {
1176         vector< vector<string> > files;
1177         //maps processors number to file pointer
1178         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1179         map<int, vector<ofstream*> >::iterator it;
1180         
1181         //create files to write to
1182         for (int i = 0; i < processors; i++) {
1183             vector<ofstream*> temp;
1184             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1185             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1186             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1187             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1188             ofstream* outFI = new ofstream;     temp.push_back(outFI);  
1189             ofstream* outRI = new ofstream;     temp.push_back(outRI);  
1190             tempfiles[i] = temp;
1191             
1192             vector<string> names;
1193             string thisOutputDir = outputDir;
1194             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1195             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1196             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1197             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1198             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1199             string findexfilename = ""; string rindexfilename = "";
1200             noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1201             if (findex != "") {  findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI);  noneOk = true; }
1202             if (rindex != "") {  rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp";  m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1203             names.push_back(ffastafilename); names.push_back(fqualfilename);
1204             names.push_back(rfastafilename); names.push_back(rqualfilename);
1205             names.push_back(findexfilename); names.push_back(rindexfilename);
1206             files.push_back(names);
1207             
1208             m->openOutputFile(ffastafilename, *outFF);
1209             m->openOutputFile(rfastafilename, *outRF);
1210             m->openOutputFile(fqualfilename, *outFQ);
1211             m->openOutputFile(rqualfilename, *outRQ);
1212         }
1213         
1214         if (m->control_pressed) {
1215             //close files, delete ofstreams
1216             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1217             //remove files
1218             for (int i = 0; i < files.size(); i++) {  
1219                 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") {  m->mothurRemove(files[i][j]); }  }
1220             }
1221         }
1222         
1223         ifstream inForward;
1224         m->openInputFile(ffastq, inForward);
1225         
1226         ifstream inReverse;
1227         m->openInputFile(rfastq, inReverse);
1228         
1229         ifstream infIndex, inrIndex;
1230         bool findexIsGood = false;
1231         bool rindexIsGood = false;
1232         if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1233         if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1234         
1235         count = 0;
1236         map<string, fastqRead> uniques;
1237         map<string, fastqRead> iUniques;
1238         map<string, pairFastqRead> pairUniques;
1239         map<string, fastqRead>::iterator itUniques;
1240         while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1241             
1242             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1243             
1244             //get a read from forward and reverse fastq files
1245             bool ignoref, ignorer, ignorefi, ignoreri;
1246             fastqRead thisFread, thisRread, thisFIread, thisRIread;
1247             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1248             else { ignoref = true; }
1249             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1250             else { ignorer = true; }
1251             if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1252             else { ignorefi = true; }
1253             if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri);  if (inrIndex.eof()) { rindexIsGood = false; } }
1254             else { ignoreri = true; }
1255             
1256             bool allowOne = false;
1257             if ((findex == "") || (rindex == "")) { allowOne = true; }
1258             vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1259             vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1260             
1261             //add in index info if provided
1262             vector<pairFastqRead> reads = frReads;
1263             if ((findex != "") || (rindex != "")) {  reads = mergeReads(frReads, friReads, pairUniques); }
1264             
1265             for (int i = 0; i < reads.size(); i++) {
1266                 fastqRead fread = reads[i].forward;
1267                 fastqRead rread = reads[i].reverse;
1268                 fastqRead firead = reads[i].findex;
1269                 fastqRead riread = reads[i].rindex;
1270                 
1271                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1272                
1273                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1274                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1275                     
1276                     //if the reads are okay write to output files
1277                     int process = count % processors;
1278                     
1279                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1280                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1281                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1282                     *(tempfiles[process][1]) << endl;
1283                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1284                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1285                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1286                     *(tempfiles[process][3]) << endl;
1287                     if (findex != "") {  *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1288                     if (rindex != "") {  *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1289                 
1290                     count++;
1291                     
1292                     //report progress
1293                     if((count) % 10000 == 0){   m->mothurOutJustToScreen(toString(count)); m->mothurOutEndLine();               }
1294                 //}
1295             }
1296                 }
1297                 //report progress
1298                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1299         
1300         if (uniques.size() != 0) {
1301             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1302                 if (m->control_pressed) { break; }
1303                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1304             }
1305             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1306                 if (m->control_pressed) { break; }
1307                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1308             }
1309             m->mothurOutEndLine();
1310         }
1311         
1312         //close files, delete ofstreams
1313         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1314         inForward.close();
1315         inReverse.close();
1316         if (findex != "") { infIndex.close(); }
1317         if (rindex != "") { inrIndex.close(); }
1318        
1319         return files;
1320     }
1321     catch(exception& e) {
1322         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1323         exit(1);
1324     }
1325 }
1326 //**********************************************************************************************************************
1327 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1328     try {
1329         vector< vector<string> > files;
1330         //maps processors number to file pointer
1331         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1332         map<int, vector<ofstream*> >::iterator it;
1333         
1334         //create files to write to
1335         for (int i = 0; i < processors; i++) {
1336             vector<ofstream*> temp;
1337             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1338             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1339             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1340             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1341             tempfiles[i] = temp;
1342             
1343             vector<string> names;
1344             string thisOutputDir = outputDir;
1345             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1346             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1347             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1348             string fqualfilename = "";
1349             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1350             string rqualfilename = "";
1351             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1352             string findexfilename = ""; string rindexfilename = "";
1353             names.push_back(ffastafilename); names.push_back(fqualfilename);
1354             names.push_back(rfastafilename); names.push_back(rqualfilename);
1355             names.push_back(findexfilename); names.push_back(rindexfilename);
1356             files.push_back(names);
1357             
1358             m->openOutputFile(ffastafilename, *outFF);
1359             m->openOutputFile(rfastafilename, *outRF);
1360         }
1361         
1362         if (m->control_pressed) {
1363             //close files, delete ofstreams
1364             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1365             //remove files
1366             for (int i = 0; i < files.size(); i++) {  
1367                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1368             }
1369         }
1370         
1371         ifstream inForwardFasta;
1372         m->openInputFile(ffasta, inForwardFasta);
1373         
1374         ifstream inReverseFasta;
1375         m->openInputFile(rfasta, inReverseFasta);
1376         
1377         ifstream inForwardQual, inReverseQual;
1378         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1379         
1380         count = 0;
1381         map<string, fastqRead> uniques;
1382         map<string, fastqRead>::iterator itUniques;
1383         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1384             
1385             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1386             
1387             //get a reads from forward and reverse fasta files
1388             bool ignoref, ignorer;
1389             fastqRead thisFread, thisRread;
1390             if (!inForwardFasta.eof()) {  
1391                 ignoref = false; 
1392                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1393                 thisFread.name = temp.getName();
1394                 thisFread.sequence = temp.getUnaligned();
1395             }else { ignoref = true; }
1396             if (!inReverseFasta.eof()) {  
1397                 ignorer = false; 
1398                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1399                 thisRread.name = temp.getName();
1400                 thisRread.sequence = temp.getUnaligned();  
1401             }else { ignorer = true; }
1402             
1403             //get qual reads if given
1404             if (fqualfile != "") {
1405                 if (!inForwardQual.eof() && !ignoref) {  
1406                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1407                     //if forward files dont match ignore read
1408                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1409                     else { thisFread.scores = temp.getQualityScores(); }
1410                 }else { ignoref = true; }
1411                 if (!inReverseQual.eof() && !ignorer) {  
1412                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1413                     //if reverse files dont match ignore read
1414                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1415                     else { thisRread.scores = temp.getQualityScores(); }
1416                 }else { ignorer = true; }
1417             }
1418             
1419             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1420             
1421             for (int i = 0; i < reads.size(); i++) {
1422                 fastqRead fread = reads[i].forward;
1423                 fastqRead rread = reads[i].reverse;
1424                 
1425                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1426                 
1427                // if (checkReads(fread, rread, ffasta, rfasta)) {
1428                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1429                     
1430                     //if the reads are okay write to output files
1431                     int process = count % processors;
1432                     
1433                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1434                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1435                     if (fqualfile != "") { //if you have quality info, print it
1436                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1437                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1438                         *(tempfiles[process][1]) << endl;
1439                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1440                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1441                         *(tempfiles[process][3]) << endl;
1442                     }
1443                     count++;
1444                     
1445                     //report progress
1446                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1447                 //}
1448             }
1449                 }
1450                 //report progress
1451                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1452         
1453         if (uniques.size() != 0) {
1454             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1455                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1456             }
1457             m->mothurOutEndLine();
1458         }
1459         
1460         //close files, delete ofstreams
1461         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1462         inReverseFasta.close(); 
1463         inForwardFasta.close(); 
1464         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1465         
1466         return files;
1467     }
1468     catch(exception& e) {
1469         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1470         exit(1);
1471     }
1472 }
1473 //**********************************************************************************************************************
1474 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1475     try {
1476         vector<pairFastqRead> reads;
1477         map<string, fastqRead>::iterator itUniques;
1478             
1479         if (!ignoref && !ignorer) {
1480             if (forward.name == reverse.name) { 
1481                 pairFastqRead temp(forward, reverse);
1482                 reads.push_back(temp);
1483             }else {
1484                 bool match = false;
1485                 //if no match are the names only different by 1 and 2?
1486                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1487                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1488                 if (tempFRead == tempRRead) {
1489                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1490                         forward.name = tempFRead;
1491                         reverse.name = tempRRead;
1492                         pairFastqRead temp(forward, reverse);
1493                         reads.push_back(temp);
1494                         match = true;
1495                     }
1496                 }
1497                 
1498                 if (!match) {
1499                     //look for forward pair
1500                     itUniques = uniques.find(forward.name);
1501                     if (itUniques != uniques.end()) {  //we have the pair for this read
1502                         pairFastqRead temp(forward, itUniques->second);
1503                         reads.push_back(temp);
1504                         uniques.erase(itUniques);
1505                     }else { //save this read for later
1506                         uniques[forward.name] = forward;
1507                     }
1508                     
1509                     //look for reverse pair
1510                     itUniques = uniques.find(reverse.name);
1511                     if (itUniques != uniques.end()) {  //we have the pair for this read
1512                         pairFastqRead temp(itUniques->second, reverse);
1513                         reads.push_back(temp);
1514                         uniques.erase(itUniques);
1515                     }else { //save this read for later
1516                         uniques[reverse.name] = reverse;
1517                     }
1518                 }
1519                                 
1520             }
1521         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1522             if (allowOne) {
1523                 fastqRead dummy;
1524                 pairFastqRead temp(forward, dummy);
1525                 reads.push_back(temp);
1526             }else {
1527                 //look for forward pair
1528                 itUniques = uniques.find(forward.name);
1529                 if (itUniques != uniques.end()) {  //we have the pair for this read
1530                     pairFastqRead temp(forward, itUniques->second);
1531                     reads.push_back(temp);
1532                     uniques.erase(itUniques);
1533                 }else { //save this read for later
1534                     uniques[forward.name] = forward;
1535                 }
1536             }
1537         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1538             if (allowOne) {
1539                 fastqRead dummy;
1540                 pairFastqRead temp(dummy, reverse);
1541                 reads.push_back(temp);
1542             }else {
1543                 //look for reverse pair
1544                 itUniques = uniques.find(reverse.name);
1545                 if (itUniques != uniques.end()) {  //we have the pair for this read
1546                     pairFastqRead temp(itUniques->second, reverse);
1547                     reads.push_back(temp);
1548                     uniques.erase(itUniques);
1549                 }else { //save this read for later
1550                     uniques[reverse.name] = reverse;
1551                 }
1552             }
1553         }//else ignore both and do nothing
1554         
1555         return reads;
1556     }
1557     catch(exception& e) {
1558         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1559         exit(1);
1560     }
1561 }
1562 //**********************************************************************************************************************
1563 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1564 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1565     try {
1566         vector<pairFastqRead> reads;
1567         map<string, pairFastqRead>::iterator itUniques;
1568         
1569         set<int> foundIndexes;
1570         for (int i = 0; i < thisReads.size(); i++) {
1571             bool found = false;
1572             for (int j = 0; j < indexes.size(); j++) {
1573                 
1574                 //incase only one index
1575                 string indexName = indexes[j].forward.name;
1576                 if (indexName == "") { indexName = indexes[j].reverse.name; }
1577                 
1578                 if (thisReads[i].forward.name == indexName){
1579                     thisReads[i].findex = indexes[j].forward;
1580                     thisReads[i].rindex = indexes[j].reverse;
1581                     reads.push_back(thisReads[i]);
1582                     found = true;
1583                     foundIndexes.insert(j);
1584                 }
1585             }
1586             
1587             if (!found) {
1588                 //look for forward pair
1589                 itUniques = uniques.find('i'+thisReads[i].forward.name);
1590                 if (itUniques != uniques.end()) {  //we have the pair for this read
1591                     thisReads[i].findex = itUniques->second.forward;
1592                     thisReads[i].rindex = itUniques->second.reverse;
1593                     reads.push_back(thisReads[i]);
1594                     uniques.erase(itUniques);
1595                 }else { //save this read for later
1596                     uniques['r'+thisReads[i].forward.name] = thisReads[i];
1597                 }
1598             }
1599         }
1600         
1601         if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1602             for (int j = 0; j < indexes.size(); j++) {
1603                 if (foundIndexes.count(j) == 0) { //we didnt find this one
1604                     //incase only one index
1605                     string indexName = indexes[j].forward.name;
1606                     if (indexName == "") { indexName = indexes[j].reverse.name; }
1607                     
1608                     //look for forward pair
1609                     itUniques = uniques.find('r'+indexName);
1610                     if (itUniques != uniques.end()) {  //we have the pair for this read
1611                         pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1612                         reads.push_back(temp);
1613                         uniques.erase(itUniques);
1614                     }else { //save this read for later
1615                         uniques['i'+indexName] = indexes[j];
1616                     }
1617                 }
1618             }
1619         }
1620        
1621         
1622         return reads;
1623     }
1624     catch(exception& e) {
1625         m->errorOut(e, "MakeContigsCommand", "mergeReads");
1626         exit(1);
1627     }
1628 }
1629 //**********************************************************************************************************************
1630 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1631     try {
1632         fastqRead read;
1633         
1634         ignore = false;
1635         
1636         //read sequence name
1637         string line = m->getline(in); m->gobble(in);
1638         vector<string> pieces = m->splitWhiteSpace(line);
1639         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1640         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1641         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1642         else { name = name.substr(1); }
1643         
1644         //read sequence
1645         string sequence = m->getline(in); m->gobble(in);
1646         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1647         
1648         //read sequence name
1649         line = m->getline(in); m->gobble(in);
1650         pieces = m->splitWhiteSpace(line);
1651         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1652         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1653         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1654         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1655         
1656         //read quality scores
1657         string quality = m->getline(in); m->gobble(in);
1658         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1659          
1660         //sanity check sequence length and number of quality scores match
1661         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1662         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1663         
1664         vector<int> qualScores = convertQual(quality);
1665         
1666         m->checkName(name);
1667         read.name = name;
1668         read.sequence = sequence;
1669         read.scores = qualScores;
1670         
1671         if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1672
1673         return read;
1674     }
1675     catch(exception& e) {
1676         m->errorOut(e, "MakeContigsCommand", "readFastq");
1677         exit(1);
1678     }
1679 }
1680 /**********************************************************************************************************************
1681 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1682     try {
1683         bool good = true;
1684         
1685         //do sequence lengths match
1686         if (forward.sequence.length() != reverse.sequence.length()) {
1687             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1688             good = false; 
1689         }
1690         
1691         //do number of qual scores match 
1692         if (forward.scores.size() != reverse.scores.size()) {
1693             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1694             good = false; 
1695         }
1696
1697         return good;
1698     }
1699     catch(exception& e) {
1700         m->errorOut(e, "MakeContigsCommand", "checkReads");
1701         exit(1);
1702     }
1703 }*/
1704 //***************************************************************************************************************
1705 //lines can be 2, 3, or 4 columns
1706 // forward.fastq reverse.fastq -> 2 column
1707 // groupName forward.fastq reverse.fastq -> 3 column
1708 // forward.fastq reverse.fastq forward.index.fastq  reverse.index.fastq  -> 4 column
1709 // forward.fastq reverse.fastq none  reverse.index.fastq  -> 4 column
1710 // forward.fastq reverse.fastq forward.index.fastq  none  -> 4 column
1711 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1712         try {
1713         vector< vector<string> > files;
1714         string forward, reverse, findex, rindex;
1715         
1716         ifstream in;
1717         m->openInputFile(filename, in);
1718         
1719         while(!in.eof()) {
1720             
1721             if (m->control_pressed) { return files; }
1722             
1723             string line = m->getline(in);  m->gobble(in);
1724             vector<string> pieces = m->splitWhiteSpace(line);
1725             
1726             string group = "";
1727             if (pieces.size() == 2) {
1728                 forward = pieces[0];
1729                 reverse = pieces[1];
1730                 group = "";
1731                 findex = "";
1732                 rindex = "";
1733             }else if (pieces.size() == 3) {
1734                 group = pieces[0];
1735                 forward = pieces[1];
1736                 reverse = pieces[2];
1737                 findex = "";
1738                 rindex = "";
1739                 createFileGroup = true;
1740             }else if (pieces.size() == 4) {
1741                 forward = pieces[0];
1742                 reverse = pieces[1];
1743                 findex = pieces[2];
1744                 rindex = pieces[3];
1745                 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1746                 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1747             }else {
1748                 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1749             }
1750             
1751             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1752             
1753             //check to make sure both are able to be opened
1754             ifstream in2;
1755             int openForward = m->openInputFile(forward, in2, "noerror");
1756             
1757             //if you can't open it, try default location
1758             if (openForward == 1) {
1759                 if (m->getDefaultPath() != "") { //default path is set
1760                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1761                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1762                     ifstream in3;
1763                     openForward = m->openInputFile(tryPath, in3, "noerror");
1764                     in3.close();
1765                     forward = tryPath;
1766                 }
1767             }
1768             
1769             //if you can't open it, try output location
1770             if (openForward == 1) {
1771                 if (m->getOutputDir() != "") { //default path is set
1772                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1773                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1774                     ifstream in4;
1775                     openForward = m->openInputFile(tryPath, in4, "noerror");
1776                     forward = tryPath;
1777                     in4.close();
1778                 }
1779             }
1780             
1781             if (openForward == 1) { //can't find it
1782                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1783             }else{  in2.close();  }
1784             
1785             ifstream in3;
1786             int openReverse = m->openInputFile(reverse, in3, "noerror");
1787             
1788             //if you can't open it, try default location
1789             if (openReverse == 1) {
1790                 if (m->getDefaultPath() != "") { //default path is set
1791                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1792                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1793                     ifstream in3;
1794                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1795                     in3.close();
1796                     reverse = tryPath;
1797                 }
1798             }
1799             
1800             //if you can't open it, try output location
1801             if (openReverse == 1) {
1802                 if (m->getOutputDir() != "") { //default path is set
1803                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1804                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1805                     ifstream in4;
1806                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1807                     reverse = tryPath;
1808                     in4.close();
1809                 }
1810             }
1811             
1812             if (openReverse == 1) { //can't find it
1813                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1814             }else{  in3.close();  }
1815             
1816             int openFindex = 0;
1817             if (findex != "") {
1818                 ifstream in4;
1819                 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1820                 
1821                 //if you can't open it, try default location
1822                 if (openFindex == 1) {
1823                     if (m->getDefaultPath() != "") { //default path is set
1824                         string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1825                         m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1826                         ifstream in5;
1827                         openFindex = m->openInputFile(tryPath, in5, "noerror");
1828                         in5.close();
1829                         findex = tryPath;
1830                     }
1831                 }
1832                 
1833                 //if you can't open it, try output location
1834                 if (openFindex == 1) {
1835                     if (m->getOutputDir() != "") { //default path is set
1836                         string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1837                         m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1838                         ifstream in6;
1839                         openFindex = m->openInputFile(tryPath, in6, "noerror");
1840                         findex = tryPath;
1841                         in6.close();
1842                     }
1843                 }
1844                 
1845                 if (openFindex == 1) { //can't find it
1846                     m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1847                 }
1848             }
1849             
1850             int openRindex = 0;
1851             if (rindex != "") {
1852                 ifstream in7;
1853                 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1854                 
1855                 //if you can't open it, try default location
1856                 if (openRindex == 1) {
1857                     if (m->getDefaultPath() != "") { //default path is set
1858                         string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1859                         m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1860                         ifstream in8;
1861                         openRindex = m->openInputFile(tryPath, in8, "noerror");
1862                         in8.close();
1863                         rindex = tryPath;
1864                     }
1865                 }
1866                 
1867                 //if you can't open it, try output location
1868                 if (openRindex == 1) {
1869                     if (m->getOutputDir() != "") { //default path is set
1870                         string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1871                         m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1872                         ifstream in9;
1873                         openRindex = m->openInputFile(tryPath, in9, "noerror");
1874                         rindex = tryPath;
1875                         in9.close();
1876                     }
1877                 }
1878                 
1879                 if (openRindex == 1) { //can't find it
1880                     m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1881                 }
1882             }
1883
1884             
1885             if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1886                 file2Group[files.size()] = group;
1887                 vector<string> pair;
1888                 pair.push_back(forward);
1889                 pair.push_back(reverse);
1890                 pair.push_back(findex);
1891                 pair.push_back(rindex);
1892                 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true;  }
1893                 files.push_back(pair);
1894             }
1895         }
1896         in.close();
1897         
1898         return files;
1899     }
1900     catch(exception& e) {
1901         m->errorOut(e, "MakeContigsCommand", "readFileNames");
1902         exit(1);
1903     }
1904 }
1905 //***************************************************************************************************************
1906 //illumina data requires paired forward and reverse data
1907 //BARCODE   atgcatgc   atgcatgc    groupName 
1908 //PRIMER   atgcatgc   atgcatgc    groupName  
1909 //PRIMER   atgcatgc   atgcatgc  
1910 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1911         try {
1912                 ifstream in;
1913                 m->openInputFile(oligosfile, in);
1914                 
1915                 ofstream test;
1916                 
1917                 string type, foligo, roligo, group;
1918         
1919                 int indexPrimer = 0;
1920                 int indexBarcode = 0;
1921         set<string> uniquePrimers;
1922         set<string> uniqueBarcodes;
1923                 
1924                 while(!in.eof()){
1925             
1926                         in >> type; 
1927     
1928                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1929             
1930                         if(type[0] == '#'){
1931                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1932                                 m->gobble(in);
1933                         }
1934                         else{
1935                                 m->gobble(in);
1936                                 //make type case insensitive
1937                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1938                                 
1939                                 in >> foligo;
1940                 
1941                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1942                                 
1943                                 for(int i=0;i<foligo.length();i++){
1944                                         foligo[i] = toupper(foligo[i]);
1945                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1946                                 }
1947                                 
1948                                 if(type == "PRIMER"){
1949                                         m->gobble(in);
1950                                         
1951                     in >> roligo;
1952                     
1953                     for(int i=0;i<roligo.length();i++){
1954                         roligo[i] = toupper(roligo[i]);
1955                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1956                     }
1957                     //roligo = reverseOligo(roligo);
1958                     
1959                     if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1960                     
1961                     group = "";
1962                     
1963                                         // get rest of line in case there is a primer name
1964                                         while (!in.eof())       {       
1965                                                 char c = in.get(); 
1966                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1967                                                 else if (c == 32 || c == 9){;} //space or tab
1968                                                 else {  group += c;  }
1969                                         } 
1970                     
1971                     oligosPair newPrimer(foligo, roligo);
1972                     
1973                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1974                     
1975                                         //check for repeat barcodes
1976                     string tempPair = foligo+roligo;
1977                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1978                     else { uniquePrimers.insert(tempPair); }
1979                                         
1980                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1981                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1982                                         primerNameVector.push_back(group);
1983                                 }else if(type == "BARCODE"){
1984                                         m->gobble(in);
1985                                         
1986                     in >> roligo;
1987                     
1988                     for(int i=0;i<roligo.length();i++){
1989                         roligo[i] = toupper(roligo[i]);
1990                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1991                     }
1992                     //roligo = reverseOligo(roligo);
1993                     
1994                     oligosPair newPair(foligo, roligo);
1995                     
1996                     if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
1997                     
1998                     group = "";
1999                     while (!in.eof())   {       
2000                                                 char c = in.get(); 
2001                                                 if (c == 10 || c == 13 || c == -1){     break;  }
2002                                                 else if (c == 32 || c == 9){;} //space or tab
2003                                                 else {  group += c;  }
2004                                         } 
2005                                         
2006                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2007                         
2008                     //check for repeat barcodes
2009                     string tempPair = foligo+roligo;
2010                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
2011                     else { uniqueBarcodes.insert(tempPair); }
2012                         
2013                     barcodes[indexBarcode]=newPair; indexBarcode++;
2014                                         barcodeNameVector.push_back(group);
2015                                 }else if(type == "LINKER"){
2016                                         linker.push_back(foligo);
2017                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2018                                 }else if(type == "SPACER"){
2019                                         spacer.push_back(foligo);
2020                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2021                                 }
2022                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2023                         }
2024                         m->gobble(in);
2025                 }       
2026                 in.close();
2027                 
2028                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
2029                 
2030                 //add in potential combos
2031                 if(barcodeNameVector.size() == 0){
2032             oligosPair temp("", "");
2033                         barcodes[0] = temp;
2034                         barcodeNameVector.push_back("");                        
2035                 }
2036                 
2037                 if(primerNameVector.size() == 0){
2038             oligosPair temp("", "");
2039                         primers[0] = temp;
2040                         primerNameVector.push_back("");                 
2041                 }
2042                 
2043                 fastaFileNames.resize(barcodeNameVector.size());
2044                 for(int i=0;i<fastaFileNames.size();i++){
2045                         fastaFileNames[i].assign(primerNameVector.size(), "");
2046                 }
2047                 
2048                 if(allFiles){
2049                         set<string> uniqueNames; //used to cleanup outputFileNames
2050                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2051                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2052                                         
2053                                         string primerName = primerNameVector[itPrimer->first];
2054                                         string barcodeName = barcodeNameVector[itBar->first];
2055                     
2056                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
2057                                         else {
2058                         string comboGroupName = "";
2059                         string fastaFileName = "";
2060                         string qualFileName = "";
2061                         string nameFileName = "";
2062                         string countFileName = "";
2063                         
2064                         if(primerName == ""){
2065                             comboGroupName = barcodeNameVector[itBar->first];
2066                         }
2067                         else{
2068                             if(barcodeName == ""){
2069                                 comboGroupName = primerNameVector[itPrimer->first];
2070                             }
2071                             else{
2072                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2073                             }
2074                         }
2075                         
2076                         
2077                         ofstream temp;
2078                         fastaFileName = rootname + comboGroupName + ".fasta";
2079                         if (uniqueNames.count(fastaFileName) == 0) {
2080                             outputNames.push_back(fastaFileName);
2081                             outputTypes["fasta"].push_back(fastaFileName);
2082                             uniqueNames.insert(fastaFileName);
2083                         }
2084                         
2085                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2086                         m->openOutputFile(fastaFileName, temp);         temp.close();
2087                     }
2088                                 }
2089                         }
2090                 }
2091                 
2092                 bool allBlank = true;
2093                 for (int i = 0; i < barcodeNameVector.size(); i++) {
2094                         if (barcodeNameVector[i] != "") {
2095                                 allBlank = false;
2096                                 break;
2097                         }
2098                 }
2099                 for (int i = 0; i < primerNameVector.size(); i++) {
2100                         if (primerNameVector[i] != "") {
2101                                 allBlank = false;
2102                                 break;
2103                         }
2104                 }
2105         
2106                 if (allBlank) {
2107                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
2108                         allFiles = false;
2109                         return false;
2110                 }
2111                 
2112                 return true;
2113                 
2114         }
2115         catch(exception& e) {
2116                 m->errorOut(e, "MakeContigsCommand", "getOligos");
2117                 exit(1);
2118         }
2119 }
2120 //********************************************************************/
2121 string MakeContigsCommand::reverseOligo(string oligo){
2122         try {
2123         string reverse = "";
2124         
2125         for(int i=oligo.length()-1;i>=0;i--){
2126             
2127             if(oligo[i] == 'A')         {       reverse += 'T'; }
2128             else if(oligo[i] == 'T'){   reverse += 'A'; }
2129             else if(oligo[i] == 'U'){   reverse += 'A'; }
2130             
2131             else if(oligo[i] == 'G'){   reverse += 'C'; }
2132             else if(oligo[i] == 'C'){   reverse += 'G'; }
2133             
2134             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2135             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2136             
2137             else if(oligo[i] == 'M'){   reverse += 'K'; }
2138             else if(oligo[i] == 'K'){   reverse += 'M'; }
2139             
2140             else if(oligo[i] == 'W'){   reverse += 'W'; }
2141             else if(oligo[i] == 'S'){   reverse += 'S'; }
2142             
2143             else if(oligo[i] == 'B'){   reverse += 'V'; }
2144             else if(oligo[i] == 'V'){   reverse += 'B'; }
2145             
2146             else if(oligo[i] == 'D'){   reverse += 'H'; }
2147             else if(oligo[i] == 'H'){   reverse += 'D'; }
2148             
2149             else                                                {       reverse += 'N'; }
2150         }
2151         
2152         
2153         return reverse;
2154     }
2155         catch(exception& e) {
2156                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2157                 exit(1);
2158         }
2159 }
2160 //**********************************************************************************************************************
2161 vector<int> MakeContigsCommand::convertQual(string qual) {
2162         try {
2163                 vector<int> qualScores;
2164         bool negativeScores = false;
2165                 
2166                 for (int i = 0; i < qual.length(); i++) { 
2167             
2168             int temp = 0;
2169             temp = int(qual[i]);
2170             if (format == "illumina") {
2171                 temp -= 64; //char '@'
2172             }else if (format == "illumina1.8+") {
2173                     temp -= int('!'); //char '!'
2174             }else if (format == "solexa") {
2175                 temp = int(convertTable[temp]); //convert to sanger
2176                 temp -= int('!'); //char '!'
2177             }else {
2178                 temp -= int('!'); //char '!'
2179             }
2180             
2181             if (temp < -5) { negativeScores = true; }
2182                         qualScores.push_back(temp);
2183                 }
2184                 
2185         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
2186         
2187                 return qualScores;
2188         }
2189         catch(exception& e) {
2190                 m->errorOut(e, "MakeContigsCommand", "convertQual");
2191                 exit(1);
2192         }
2193 }
2194
2195 //**********************************************************************************************************************
2196
2197
2198
2199