]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
fixes while testing 1.33.0
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22         CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23         CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
27
28         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41                 
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MakeContigsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){     
53         try {
54                 string helpString = "";
55                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57         helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60         helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
62         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
63         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
64                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65         helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process.  \n";
66         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
67         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
74         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
77         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
79         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
80
81         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82         helpString += "The make.contigs command should be in the following format: \n";
83                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
85                 return helpString;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
94     try {
95         string pattern = "";
96         
97         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
98         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
99         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
100         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
101         
102         return pattern;
103     }
104     catch(exception& e) {
105         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
106         exit(1);
107     }
108 }
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){       
111         try {
112                 abort = true; calledHelp = true; 
113                 setParameters();
114                 vector<string> tempOutNames;
115                 outputTypes["fasta"] = tempOutNames;
116         outputTypes["group"] = tempOutNames;
117         outputTypes["report"] = tempOutNames;
118     }
119         catch(exception& e) {
120                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option)  {
126         try {
127                 abort = false; calledHelp = false;
128         createFileGroup = false; createOligosGroup = false;
129         
130                 //allow user to run help
131                 if(option == "help") { help(); abort = true; calledHelp = true; }
132                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
133                 
134                 else {
135                         vector<string> myArray = setParameters();
136                         
137                         OptionParser parser(option);
138                         map<string, string> parameters = parser.getParameters(); 
139                         
140                         ValidParameters validParameter("pairwise.seqs");
141                         map<string, string>::iterator it;
142                         
143                         //check to make sure all parameters are valid for command
144                         for (it = parameters.begin(); it != parameters.end(); it++) { 
145                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
146                         }
147                         
148                         //initialize outputTypes
149                         vector<string> tempOutNames;
150                         outputTypes["fasta"] = tempOutNames;
151             outputTypes["report"] = tempOutNames;
152             outputTypes["group"] = tempOutNames;
153                         
154             
155                         //if the user changes the input directory command factory will send this info to us in the output parameter 
156                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
157                         if (inputDir == "not found"){   inputDir = "";          }
158                         else { 
159                                 string path;
160                 it = parameters.find("ffastq");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
166                                 }
167                 
168                 it = parameters.find("rfastq");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
174                                 }
175                 
176                 it = parameters.find("ffasta");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
182                                 }
183                 
184                 it = parameters.find("rfasta");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
190                                 }
191                 
192                 it = parameters.find("fqfile");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
198                                 }
199                 
200                 it = parameters.find("rqfile");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
206                                 }
207                 
208                 it = parameters.find("file");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
214                                 }
215                 
216                 it = parameters.find("oligos");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
222                                 }
223                 
224                 it = parameters.find("findex");
225                                 //user has given a template file
226                                 if(it != parameters.end()){
227                                         path = m->hasPath(it->second);
228                                         //if the user has not given a path then, add inputdir. else leave path alone.
229                                         if (path == "") {       parameters["findex"] = inputDir + it->second;           }
230                                 }
231                 
232                 it = parameters.find("rindex");
233                                 //user has given a template file
234                                 if(it != parameters.end()){
235                                         path = m->hasPath(it->second);
236                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                         if (path == "") {       parameters["rindex"] = inputDir + it->second;           }
238                                 }
239             }
240             
241             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242                         if (ffastqfile == "not open") {  abort = true; }        
243                         else if (ffastqfile == "not found") { ffastqfile = ""; }
244                         
245                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246                         if (rfastqfile == "not open") {  abort = true; }        
247                         else if (rfastqfile == "not found") { rfastqfile = "";  }
248             
249             ffastafile = validParameter.validFile(parameters, "ffasta", true);
250                         if (ffastafile == "not open") {  abort = true; }        
251                         else if (ffastafile == "not found") { ffastafile = ""; }
252                         
253                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
254                         if (rfastafile == "not open") {  abort = true; }        
255                         else if (rfastafile == "not found") { rfastafile = "";  }
256             
257             fqualfile = validParameter.validFile(parameters, "fqfile", true);
258                         if (fqualfile == "not open") {  abort = true; } 
259                         else if (fqualfile == "not found") { fqualfile = ""; }
260                         
261                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
262                         if (rqualfile == "not open") {  abort = true; } 
263                         else if (rqualfile == "not found") { rqualfile = "";  }
264             
265             file = validParameter.validFile(parameters, "file", true);
266                         if (file == "not open") {  abort = true; }      
267                         else if (file == "not found") { file = "";  }
268             
269             //provide at least
270             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
280             }
281             
282             oligosfile = validParameter.validFile(parameters, "oligos", true);
283                         if (oligosfile == "not found")      {   oligosfile = "";        }
284                         else if(oligosfile == "not open")   {   abort = true;       } 
285                         else {   m->setOligosFile(oligosfile);          }
286             
287             findexfile = validParameter.validFile(parameters, "findex", true);
288                         if (findexfile == "not found")      {   findexfile = "";        }
289                         else if(findexfile == "not open")   {   abort = true;       }
290             
291             rindexfile = validParameter.validFile(parameters, "rindex", true);
292                         if (rindexfile == "not found")      {   rindexfile = "";        }
293                         else if(rindexfile == "not open")   {   abort = true;       }
294         
295             if ((rindexfile != "") || (findexfile != "")) {
296                                 if (oligosfile == ""){
297                                         oligosfile = m->getOligosFile();
298                                         if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n");  }
299                                         else {
300                         m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
301                                         }
302                                 }
303                 
304                 //can only use an index file with the fastq parameters not fasta and qual
305                 if ((ffastafile != "") || (rfastafile != "")) {
306                     m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
307                 }
308                         }
309             
310             //if the user changes the output directory command factory will send this info to us in the output parameter 
311                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
312                  outputDir = ""; 
313             }
314                         
315
316                         //check for optional parameter and set defaults
317                         // ...at some point should added some additional type checking...
318                         string temp;
319                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
320                         m->mothurConvert(temp, match);  
321                         
322                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
323                         m->mothurConvert(temp, misMatch);  
324             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
325                         
326                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
327                         m->mothurConvert(temp, gapOpen);  
328             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
329                         
330                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
331                         m->mothurConvert(temp, gapExtend); 
332             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
333                         
334             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
335                         m->mothurConvert(temp, insert); 
336             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
337
338             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
339                         m->mothurConvert(temp, deltaq);
340             
341                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
342                         m->setProcessors(temp);
343                         m->mothurConvert(temp, processors);
344             
345             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
346                         m->mothurConvert(temp, bdiffs);
347                         
348                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
349                         m->mothurConvert(temp, pdiffs);
350             
351   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
352 //                      m->mothurConvert(temp, ldiffs);
353             ldiffs = 0;
354             
355  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
356  //           m->mothurConvert(temp, sdiffs);
357             sdiffs = 0;
358                         
359                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
360                         m->mothurConvert(temp, tdiffs);
361                         
362                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
363
364             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
365                         allFiles = m->isTrue(temp);
366             
367             
368             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
369                         trimOverlap = m->isTrue(temp);
370                         
371                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
372                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
373             
374             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
375             
376             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
377                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
378                                 abort=true;
379                         }
380             
381             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382             for (int i = -64; i < 65; i++) { 
383                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384                 convertTable.push_back(temp);
385             }
386         }
387                 
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
391                 exit(1);
392         }
393 }
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
396         try {
397                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
398         
399         //read ffastq and rfastq files creating fasta and qual files.
400         //this function will create a forward and reverse, fasta and qual files for each processor.
401         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
402         unsigned long int numReads = 0;
403         int start = time(NULL);
404         longestBase = 1000;
405         m->mothurOut("Reading fastq data...\n"); 
406         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407         m->mothurOut("Done.\n");
408        
409         if (m->control_pressed) { return 0; }
410         
411         map<string, string> cvars;
412         string compOutputDir = outputDir;
413         if (outputDir == "") { compOutputDir = m->hasPath(file); }
414         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
415         cvars["[tag]"] = "";
416         string compositeGroupFile = getOutputFileName("group",cvars);
417         cvars["[tag]"] = "trim";
418         string compositeFastaFile = getOutputFileName("fasta",cvars);
419         cvars["[tag]"] = "scrap";
420         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
421         cvars["[tag]"] = "";
422         string compositeMisMatchFile = getOutputFileName("report",cvars);
423         
424         if (filesToProcess.size() > 1) { //clear files for append below
425             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
432         }
433         
434         map<string, int> totalGroupCounts;
435         
436         for (int l = 0; l < filesToProcess.size(); l++) {
437             
438             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
439             
440             groupCounts.clear();
441             groupMap.clear();
442             vector<vector<string> > fastaFileNames;
443             createOligosGroup = false;
444             string outputGroupFileName;
445             map<string, string> variables; 
446             string thisOutputDir = outputDir;
447             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449             variables["[tag]"] = "";
450             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
451             if (createOligosGroup || createFileGroup) {
452                 outputGroupFileName = getOutputFileName("group",variables);
453             }
454             
455             //give group in file file precedence
456             if (createFileGroup) {  createOligosGroup = false; }
457             
458             variables["[tag]"] = "trim";
459             string outFastaFile = getOutputFileName("fasta",variables);
460             variables["[tag]"] = "scrap";
461             string outScrapFastaFile = getOutputFileName("fasta",variables);
462             variables["[tag]"] = "";
463             string outMisMatchFile = getOutputFileName("report",variables);
464                         
465             m->mothurOut("Making contigs...\n"); 
466             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
467             
468             //remove temp fasta and qual files
469             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
470             
471             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
472             
473             if(allFiles){
474                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475                 map<string, string>::iterator it;
476                 set<string> namesToRemove;
477                 for(int i=0;i<fastaFileNames.size();i++){
478                     for(int j=0;j<fastaFileNames[0].size();j++){
479                         if (fastaFileNames[i][j] != "") {
480                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481                                 if(m->isBlank(fastaFileNames[i][j])){
482                                     m->mothurRemove(fastaFileNames[i][j]);
483                                     namesToRemove.insert(fastaFileNames[i][j]);
484                                 }else{  
485                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
486                                     if (it == uniqueFastaNames.end()) { 
487                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
488                                     }   
489                                 }
490                             }
491                         }
492                     }
493                 }
494                 
495                 //remove names for outputFileNames, just cleans up the output
496                 vector<string> outputNames2;
497                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498                 outputNames = outputNames2;
499                 
500                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
501                     ifstream in;
502                     m->openInputFile(it->first, in);
503                     
504                     ofstream out;
505                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507                     m->openOutputFile(thisGroupName, out);
508                     
509                     while (!in.eof()){
510                         if (m->control_pressed) { break; }
511                         
512                         Sequence currSeq(in); m->gobble(in);
513                         out << currSeq.getName() << '\t' << it->second << endl;
514                     }
515                     out.close();
516                     in.close();
517                 }
518             }
519             
520             if (createFileGroup || createOligosGroup) {
521                 ofstream outGroup;
522                 m->openOutputFile(outputGroupFileName, outGroup);
523                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
525                 }
526                 outGroup.close();
527             }
528             
529             if (filesToProcess.size() > 1) { //merge into large combo files
530                 if (createFileGroup || createOligosGroup) {
531                     if (l == 0) {
532                         ofstream outCGroup;
533                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
535                     }
536                     m->appendFiles(outputGroupFileName, compositeGroupFile);
537                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
538                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
539                     
540                     for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541                         map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542                         if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543                         else { itTemp->second += itGroups->second; } //existing group, update total
544                     }
545                 }
546                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
547                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
548                 m->appendFiles(outFastaFile, compositeFastaFile);
549                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
550                 if (!allFiles) {
551                     m->mothurRemove(outMisMatchFile);
552                     m->mothurRemove(outFastaFile);
553                     m->mothurRemove(outScrapFastaFile);
554                 }else {
555                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
558                 }
559             }else {
560                 totalGroupCounts = groupCounts;
561                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564                 if (createFileGroup || createOligosGroup) {
565                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
566                 }
567             }
568             m->mothurOut("Done.\n");
569         }
570         
571         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
572         
573         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
574         
575                 //output group counts
576                 m->mothurOutEndLine();
577                 int total = 0;
578                 if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
579                 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
581                 }
582                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
583                 
584                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
585         
586         string currentFasta = "";
587                 itTypes = outputTypes.find("fasta");
588                 if (itTypes != outputTypes.end()) {
589                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
590                 }
591         
592         string currentGroup = "";
593                 itTypes = outputTypes.find("group");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
596                 }
597                 
598         //output files created by command
599                 m->mothurOutEndLine();
600                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
602                 m->mothurOutEndLine();
603
604         return 0;
605     }
606         catch(exception& e) {
607                 m->errorOut(e, "MakeContigsCommand", "execute");
608                 exit(1);
609         }
610 }
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
613         try {
614         vector< vector< vector<string> > > filesToProcess;
615         
616         if (ffastqfile != "") { //reading one file
617             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618             //adjust for really large processors or really small files
619             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620             if (numReads < processors) { 
621                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622                 files.resize(numReads);
623                 processors = numReads; 
624             }
625             filesToProcess.push_back(files);
626         }else if (file != "") { //reading multiple files
627             //return only valid pairs
628             vector< vector<string> > filePairsToProcess = readFileNames(file);
629             
630             if (m->control_pressed) { return filesToProcess; }
631             
632             if (filePairsToProcess.size() != 0) {
633                 for (int i = 0; i < filePairsToProcess.size(); i++) {
634                     
635                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
636                     
637                     unsigned long int thisFilesReads;
638                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
639                     
640                     //adjust for really large processors or really small files
641                     if (thisFilesReads < processors) { 
642                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
643                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644                         //remove from file2Group if necassary
645                         map<int, string> cFile2Group;
646                         for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647                             if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648                             else if ((it->first) == i) { } //do nothing, we removed files for i
649                             else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
650                         }
651                         file2Group = cFile2Group;
652                     }else {
653                         filesToProcess.push_back(files);
654                         numReads += thisFilesReads;
655                     }
656                 }
657                 //all files are bad
658                 if (numReads == 0) {  m->control_pressed = true; }
659             }
660         }else if (ffastafile != "") {
661             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662             //adjust for really large processors or really small files
663             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664             if (numReads < processors) { 
665                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666                 files.resize(numReads);
667                 processors = numReads; 
668             }
669             filesToProcess.push_back(files);
670         }else { m->control_pressed = true; } //should not get here
671         
672         return filesToProcess;
673     }
674         catch(exception& e) {
675                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
676                 exit(1);
677         }
678 }
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
681         try {
682                 int num = 0;
683                 vector<int> processIDS;
684         string group = "";
685         map<int, string>::iterator it = file2Group.find(index);
686         if (it != file2Group.end()) { group = it->second; }
687         
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
689                 int process = 0;
690                 
691                 //loop through and create all the processes you want
692                 while (process != processors-1) {
693                         int pid = fork();
694                         
695                         if (pid > 0) {
696                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
697                                 process++;
698                         }else if (pid == 0){
699                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
700                 
701                                 if(allFiles){
702                                         ofstream temp;
703                     
704                                         for(int i=0;i<tempFASTAFileNames.size();i++){
705                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706                                                         if (tempFASTAFileNames[i][j] != "") {
707                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
708                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
709                                                         }
710                                                 }
711                                         }
712                                 }
713                 
714                                 num = driver(files[process], 
715                              outputFasta + toString(getpid()) + ".temp", 
716                              outputScrapFasta + toString(getpid()) + ".temp", 
717                              outputMisMatches + toString(getpid()) + ".temp",
718                              tempFASTAFileNames, process, group);
719                                 
720                                 //pass groupCounts to parent
721                 ofstream out;
722                 string tempFile = toString(getpid()) + ".num.temp";
723                 m->openOutputFile(tempFile, out);
724                 out << num << endl;
725                                 if (createFileGroup || createOligosGroup) {
726                                         out << groupCounts.size() << endl;
727                                         
728                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729                                                 out << it->first << '\t' << it->second << endl;
730                                         }
731                     
732                     out << groupMap.size() << endl;
733                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734                                                 out << it->first << '\t' << it->second << endl;
735                                         }
736                                 }
737                 out.close();
738                                 
739                                 exit(0);
740                         }else { 
741                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
742                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
743                                 exit(0);
744                         }
745                 }
746                 
747         ofstream temp;
748                 m->openOutputFile(outputFasta, temp);           temp.close();
749         m->openOutputFile(outputScrapFasta, temp);              temp.close();
750                 
751                 //do my part
752                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
753                 
754                 //force parent to wait until all the processes are done
755                 for (int i=0;i<processIDS.size();i++) { 
756                         int temp = processIDS[i];
757                         wait(&temp);
758                 }
759         
760                 for (int i = 0; i < processIDS.size(); i++) {
761             ifstream in;
762             string tempFile = toString(processIDS[i]) + ".num.temp";
763             m->openInputFile(tempFile, in);
764             int tempNum;
765             in >> tempNum; num += tempNum; m->gobble(in);
766             
767                         if (createFileGroup || createOligosGroup) {
768                                 string group;
769                                 in >> tempNum; m->gobble(in);
770                                 
771                                 if (tempNum != 0) {
772                                         for (int j = 0; j < tempNum; j++) { 
773                         int groupNum;
774                                                 in >> group >> groupNum; m->gobble(in);
775                         
776                                                 map<string, int>::iterator it = groupCounts.find(group);
777                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
778                                                 else { groupCounts[it->first] += groupNum; }
779                                         }
780                                 }
781                 in >> tempNum; m->gobble(in);
782                 if (tempNum != 0) {
783                                         for (int j = 0; j < tempNum; j++) { 
784                         string group, seqName;
785                                                 in >> seqName >> group; m->gobble(in);
786                         
787                                                 map<string, string>::iterator it = groupMap.find(seqName);
788                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
789                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
790                                         }
791                                 }
792                         }
793             in.close(); m->mothurRemove(tempFile);
794         }
795     #else
796         
797         //////////////////////////////////////////////////////////////////////////////////////////////////////
798                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
799                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
800                 //////////////////////////////////////////////////////////////////////////////////////////////////////
801                 
802                 vector<contigsData*> pDataArray; 
803                 DWORD   dwThreadIdArray[processors-1];
804                 HANDLE  hThreadArray[processors-1]; 
805                 
806                 //Create processor worker threads.
807                 for( int h=0; h<processors-1; h++ ){
808                         string extension = "";
809                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
811                         
812             if(allFiles){
813                 ofstream temp;
814                 
815                 for(int i=0;i<tempFASTAFileNames.size();i++){
816                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
817                         if (tempFASTAFileNames[i][j] != "") {
818                             tempFASTAFileNames[i][j] += extension;
819                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
820                         }
821                     }
822                 }
823             }
824                                  
825                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826                         pDataArray.push_back(tempcontig);
827             
828                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
829                 }
830         
831         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
832
833         if(allFiles){
834             ofstream temp;
835             string extension = toString(processors-1) + ".temp";
836             
837             for(int i=0;i<tempFASTAFileNames.size();i++){
838                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839                     if (tempFASTAFileNames[i][j] != "") {
840                         tempFASTAFileNames[i][j] += extension;
841                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
842                     }
843                 }
844             }
845         }
846
847                 //parent do my part
848                 ofstream temp;
849                 m->openOutputFile(outputFasta, temp);           temp.close();
850         m->openOutputFile(outputScrapFasta, temp);              temp.close();
851         
852         //do my part
853         processIDS.push_back(processors-1);
854                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
855         
856                 //Wait until all threads have terminated.
857                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
858                 
859                 //Close all thread handles and free memory allocations.
860                 for(int i=0; i < pDataArray.size(); i++){
861                         num += pDataArray[i]->count;
862             if (!pDataArray[i]->done) {
863                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
864             }
865             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866                 map<string, int>::iterator it2 = groupCounts.find(it->first);
867                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868                 else { groupCounts[it->first] += it->second; }
869             }
870             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871                 map<string, string>::iterator it2 = groupMap.find(it->first);
872                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
873                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
874             }
875             CloseHandle(hThreadArray[i]);
876                         delete pDataArray[i];
877         }
878                                 
879     #endif      
880         
881         for (int i = 0; i < processIDS.size(); i++) {
882                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
884                         
885                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
887             
888             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
890             
891             if(allFiles){
892                                 for(int j=0;j<fastaFileNames.size();j++){
893                                         for(int k=0;k<fastaFileNames[j].size();k++){
894                                                 if (fastaFileNames[j][k] != "") {
895                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
897                                                 }
898                                         }
899                                 }
900                         }
901                 }
902                 
903                 return num;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
907                 exit(1);
908         }
909 }
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
912     try {
913         
914         Alignment* alignment;
915         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
916                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
917         
918         int num = 0;
919         string thisffastafile = files[0];
920         string thisfqualfile = files[1];
921         string thisrfastafile = files[2];
922         string thisrqualfile = files[3];
923         string thisfindexfile = files[4];
924         string thisrindexfile = files[5];
925         
926         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
927         
928         ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929         ofstream outFasta, outMisMatch, outScrapFasta;
930         m->openInputFile(thisffastafile, inFFasta);
931         m->openInputFile(thisrfastafile, inRFasta);
932         if (thisfqualfile != "") {
933             m->openInputFile(thisfqualfile, inFQual);
934             m->openInputFile(thisrqualfile, inRQual);
935         }
936         
937         if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex);  }
938         if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex);  }
939         
940         m->openOutputFile(outputFasta, outFasta);
941         m->openOutputFile(outputScrapFasta, outScrapFasta);
942         m->openOutputFile(outputMisMatches, outMisMatch);
943         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
944         
945         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
946         
947         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
948             
949             if (m->control_pressed) { break; }
950             
951             int success = 1;
952             string trashCode = "";
953             int currentSeqsDiffs = 0;
954
955             //read seqs and quality info
956             Sequence fSeq(inFFasta); m->gobble(inFFasta);
957             Sequence rSeq(inRFasta); m->gobble(inRFasta);
958             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959             if (thisfqualfile != "") {
960                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
962             }
963             Sequence findexBarcode("findex", "NONE");  Sequence rindexBarcode("rindex", "NONE");
964             if (thisfindexfile != "") {
965                 Sequence temp(inFIndex); m->gobble(inFIndex);
966                 findexBarcode.setAligned(temp.getAligned());
967             }
968             
969             if (thisrindexfile != "") {
970                 Sequence temp(inRIndex); m->gobble(inRIndex);
971                 rindexBarcode.setAligned(temp.getAligned());
972             }
973             
974             int barcodeIndex = 0;
975             int primerIndex = 0;
976                         
977             if(barcodes.size() != 0){
978                 if (thisfqualfile != "") {
979                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
980                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
981                     }else {
982                         success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
983                     }
984                 }else {
985                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
986                 }
987                 if(success > bdiffs)            {       trashCode += 'b';       }
988                 else{ currentSeqsDiffs += success;  }
989             }
990             
991             if(primers.size() != 0){
992                 if (thisfqualfile != "") {
993                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
994                 }else {
995                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
996                 }
997                 if(success > pdiffs)            {       trashCode += 'f';       }
998                 else{ currentSeqsDiffs += success;  }
999             }
1000             
1001             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
1002             
1003             //flip the reverse reads
1004             rSeq.reverseComplement();
1005             if (thisfqualfile != "") { rQual->flipQScores(); }
1006
1007             //pairwise align
1008             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011             fSeq.setAligned(alignment->getSeqAAln());
1012             rSeq.setAligned(alignment->getSeqBAln());
1013             int length = fSeq.getAligned().length();
1014             
1015             //traverse alignments merging into one contiguous seq
1016             string contig = "";
1017             int numMismatches = 0;
1018             string seq1 = fSeq.getAligned();
1019             string seq2 = rSeq.getAligned();
1020             vector<int> scores1, scores2; 
1021             if (thisfqualfile != "") {
1022                 scores1 = fQual->getQualityScores();
1023                 scores2 = rQual->getQualityScores();
1024                 delete fQual; delete rQual;
1025             }
1026             
1027             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028             int overlapStart = fSeq.getStartPos();
1029             int seq2Start = rSeq.getStartPos();
1030             
1031             //bigger of the 2 starting positions is the location of the overlapping start
1032             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033                 overlapStart = seq2Start; 
1034                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
1035             }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
1037             }
1038             
1039             int seq1End = fSeq.getEndPos();
1040             int seq2End = rSeq.getEndPos();
1041             int overlapEnd = seq1End;
1042             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
1043             
1044             int oStart = contig.length();
1045             //cout << fSeq.getAligned()  << endl; cout << rSeq.getAligned() << endl;
1046             for (int i = overlapStart; i < overlapEnd; i++) {
1047                 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1048                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1049                     contig += seq1[i];
1050                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1051                     if (thisfqualfile != "") {
1052                         if (scores2[BBaseMap[i]] <= insert) { } //
1053                         else { contig += seq2[i];  }
1054                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
1055                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1056                     if (thisfqualfile != "") {
1057                         if (scores1[ABaseMap[i]] <= insert) { } //
1058                         else { contig += seq1[i];  }
1059                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
1060                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1061                     if (thisfqualfile != "") {
1062                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1063                             char c = seq1[i];
1064                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1065                             contig += c;
1066                         }else { //if no, base becomes n
1067                             contig += 'N';
1068                         }
1069                         numMismatches++;
1070                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1071                 }else { //should never get here
1072                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1073                 }
1074             }
1075             int oend = contig.length();
1076             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1077                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
1078             }else { //seq2 ends before seq1 so take from overlap to length from seq1
1079                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
1080             }
1081             //cout << contig << endl;
1082             //exit(1);
1083             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
1084             
1085             if(trashCode.length() == 0){
1086                 bool ignore = false;
1087                 
1088                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1089                 
1090                 if (createOligosGroup) {
1091                     if(barcodes.size() != 0){
1092                         string thisGroup = barcodeNameVector[barcodeIndex];
1093                         if (primers.size() != 0) { 
1094                             if (primerNameVector[primerIndex] != "") { 
1095                                 if(thisGroup != "") {
1096                                     thisGroup += "." + primerNameVector[primerIndex]; 
1097                                 }else {
1098                                     thisGroup = primerNameVector[primerIndex]; 
1099                                 }
1100                             } 
1101                         }
1102                         
1103                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1104                         
1105                         int pos = thisGroup.find("ignore");
1106                         if (pos == string::npos) {
1107                             groupMap[fSeq.getName()] = thisGroup; 
1108                         
1109                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1110                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1111                             else { groupCounts[it->first] ++; }
1112                         }else { ignore = true; }
1113                         
1114                     }
1115                 }else if (createFileGroup) {
1116                     int pos = group.find("ignore");
1117                     if (pos == string::npos) {
1118                         groupMap[fSeq.getName()] = group;
1119                         
1120                         map<string, int>::iterator it = groupCounts.find(group);
1121                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1122                         else { groupCounts[it->first] ++; }
1123                     }else { ignore = true; }
1124                 }
1125                 if (m->debug) { m->mothurOut("\n"); }
1126                 
1127                 if(allFiles && !ignore){
1128                     ofstream output;
1129                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1130                     output << ">" << fSeq.getName() << endl << contig << endl;
1131                     output.close();
1132                 }
1133                 
1134                 //output
1135                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1136                 int numNs = 0;
1137                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1138                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1139             }else {
1140                 //output
1141                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1142             }
1143             num++;
1144             
1145                         //report progress
1146                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1147                 }
1148         
1149                 //report progress
1150                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1151         
1152         inFFasta.close();
1153         inRFasta.close();
1154         outFasta.close();
1155         outScrapFasta.close();
1156         outMisMatch.close();
1157         if (thisfqualfile != "") {
1158             inFQual.close();
1159             inRQual.close();
1160         }
1161         delete alignment;
1162         
1163         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1164     
1165         return num;
1166     }
1167         catch(exception& e) {
1168                 m->errorOut(e, "MakeContigsCommand", "driver");
1169                 exit(1);
1170         }
1171 }
1172 //**********************************************************************************************************************
1173 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1174     try {
1175         vector< vector<string> > files;
1176         //maps processors number to file pointer
1177         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1178         map<int, vector<ofstream*> >::iterator it;
1179         
1180         //create files to write to
1181         for (int i = 0; i < processors; i++) {
1182             vector<ofstream*> temp;
1183             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1184             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1185             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1186             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1187             ofstream* outFI = new ofstream;     temp.push_back(outFI);  
1188             ofstream* outRI = new ofstream;     temp.push_back(outRI);  
1189             tempfiles[i] = temp;
1190             
1191             vector<string> names;
1192             string thisOutputDir = outputDir;
1193             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1194             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1195             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1196             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1197             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1198             string findexfilename = ""; string rindexfilename = "";
1199             noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1200             if (findex != "") {  findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI);  noneOk = true; }
1201             if (rindex != "") {  rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp";  m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1202             names.push_back(ffastafilename); names.push_back(fqualfilename);
1203             names.push_back(rfastafilename); names.push_back(rqualfilename);
1204             names.push_back(findexfilename); names.push_back(rindexfilename);
1205             files.push_back(names);
1206             
1207             m->openOutputFile(ffastafilename, *outFF);
1208             m->openOutputFile(rfastafilename, *outRF);
1209             m->openOutputFile(fqualfilename, *outFQ);
1210             m->openOutputFile(rqualfilename, *outRQ);
1211         }
1212         
1213         if (m->control_pressed) {
1214             //close files, delete ofstreams
1215             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1216             //remove files
1217             for (int i = 0; i < files.size(); i++) {  
1218                 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") {  m->mothurRemove(files[i][j]); }  }
1219             }
1220         }
1221         
1222         ifstream inForward;
1223         m->openInputFile(ffastq, inForward);
1224         
1225         ifstream inReverse;
1226         m->openInputFile(rfastq, inReverse);
1227         
1228         ifstream infIndex, inrIndex;
1229         bool findexIsGood = false;
1230         bool rindexIsGood = false;
1231         if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1232         if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1233         
1234         count = 0;
1235         map<string, fastqRead> uniques;
1236         map<string, fastqRead> iUniques;
1237         map<string, pairFastqRead> pairUniques;
1238         map<string, fastqRead>::iterator itUniques;
1239         while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1240             
1241             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1242             
1243             //get a read from forward and reverse fastq files
1244             bool ignoref, ignorer, ignorefi, ignoreri;
1245             fastqRead thisFread, thisRread, thisFIread, thisRIread;
1246             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1247             else { ignoref = true; }
1248             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1249             else { ignorer = true; }
1250             if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1251             else { ignorefi = true; }
1252             if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri);  if (inrIndex.eof()) { rindexIsGood = false; } }
1253             else { ignoreri = true; }
1254             
1255             bool allowOne = false;
1256             if ((findex == "") || (rindex == "")) { allowOne = true; }
1257             vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1258             vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1259             
1260             //add in index info if provided
1261             vector<pairFastqRead> reads = frReads;
1262             if ((findex != "") || (rindex != "")) {  reads = mergeReads(frReads, friReads, pairUniques); }
1263             
1264             for (int i = 0; i < reads.size(); i++) {
1265                 fastqRead fread = reads[i].forward;
1266                 fastqRead rread = reads[i].reverse;
1267                 fastqRead firead = reads[i].findex;
1268                 fastqRead riread = reads[i].rindex;
1269                 
1270                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1271                
1272                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1273                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1274                     
1275                     //if the reads are okay write to output files
1276                     int process = count % processors;
1277                     
1278                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1279                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1280                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1281                     *(tempfiles[process][1]) << endl;
1282                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1283                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1284                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1285                     *(tempfiles[process][3]) << endl;
1286                     if (findex != "") {  *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1287                     if (rindex != "") {  *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1288                 
1289                     count++;
1290                     
1291                     //report progress
1292                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1293                 //}
1294             }
1295                 }
1296                 //report progress
1297                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1298         
1299         if (uniques.size() != 0) {
1300             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1301                 if (m->control_pressed) { break; }
1302                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1303             }
1304             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1305                 if (m->control_pressed) { break; }
1306                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1307             }
1308             m->mothurOutEndLine();
1309         }
1310         
1311         //close files, delete ofstreams
1312         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1313         inForward.close();
1314         inReverse.close();
1315         if (findex != "") { infIndex.close(); }
1316         if (rindex != "") { inrIndex.close(); }
1317        
1318         return files;
1319     }
1320     catch(exception& e) {
1321         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1322         exit(1);
1323     }
1324 }
1325 //**********************************************************************************************************************
1326 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1327     try {
1328         vector< vector<string> > files;
1329         //maps processors number to file pointer
1330         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1331         map<int, vector<ofstream*> >::iterator it;
1332         
1333         //create files to write to
1334         for (int i = 0; i < processors; i++) {
1335             vector<ofstream*> temp;
1336             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1337             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1338             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1339             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1340             tempfiles[i] = temp;
1341             
1342             vector<string> names;
1343             string thisOutputDir = outputDir;
1344             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1345             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1346             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1347             string fqualfilename = "";
1348             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1349             string rqualfilename = "";
1350             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1351             string findexfilename = ""; string rindexfilename = "";
1352             names.push_back(ffastafilename); names.push_back(fqualfilename);
1353             names.push_back(rfastafilename); names.push_back(rqualfilename);
1354             names.push_back(findexfilename); names.push_back(rindexfilename);
1355             files.push_back(names);
1356             
1357             m->openOutputFile(ffastafilename, *outFF);
1358             m->openOutputFile(rfastafilename, *outRF);
1359         }
1360         
1361         if (m->control_pressed) {
1362             //close files, delete ofstreams
1363             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1364             //remove files
1365             for (int i = 0; i < files.size(); i++) {  
1366                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1367             }
1368         }
1369         
1370         ifstream inForwardFasta;
1371         m->openInputFile(ffasta, inForwardFasta);
1372         
1373         ifstream inReverseFasta;
1374         m->openInputFile(rfasta, inReverseFasta);
1375         
1376         ifstream inForwardQual, inReverseQual;
1377         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1378         
1379         count = 0;
1380         map<string, fastqRead> uniques;
1381         map<string, fastqRead>::iterator itUniques;
1382         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1383             
1384             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1385             
1386             //get a reads from forward and reverse fasta files
1387             bool ignoref, ignorer;
1388             fastqRead thisFread, thisRread;
1389             if (!inForwardFasta.eof()) {  
1390                 ignoref = false; 
1391                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1392                 thisFread.name = temp.getName();
1393                 thisFread.sequence = temp.getUnaligned();
1394             }else { ignoref = true; }
1395             if (!inReverseFasta.eof()) {  
1396                 ignorer = false; 
1397                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1398                 thisRread.name = temp.getName();
1399                 thisRread.sequence = temp.getUnaligned();  
1400             }else { ignorer = true; }
1401             
1402             //get qual reads if given
1403             if (fqualfile != "") {
1404                 if (!inForwardQual.eof() && !ignoref) {  
1405                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1406                     //if forward files dont match ignore read
1407                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1408                     else { thisFread.scores = temp.getQualityScores(); }
1409                 }else { ignoref = true; }
1410                 if (!inReverseQual.eof() && !ignorer) {  
1411                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1412                     //if reverse files dont match ignore read
1413                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1414                     else { thisRread.scores = temp.getQualityScores(); }
1415                 }else { ignorer = true; }
1416             }
1417             
1418             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1419             
1420             for (int i = 0; i < reads.size(); i++) {
1421                 fastqRead fread = reads[i].forward;
1422                 fastqRead rread = reads[i].reverse;
1423                 
1424                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1425                 
1426                // if (checkReads(fread, rread, ffasta, rfasta)) {
1427                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1428                     
1429                     //if the reads are okay write to output files
1430                     int process = count % processors;
1431                     
1432                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1433                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1434                     if (fqualfile != "") { //if you have quality info, print it
1435                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1436                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1437                         *(tempfiles[process][1]) << endl;
1438                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1439                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1440                         *(tempfiles[process][3]) << endl;
1441                     }
1442                     count++;
1443                     
1444                     //report progress
1445                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1446                 //}
1447             }
1448                 }
1449                 //report progress
1450                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1451         
1452         if (uniques.size() != 0) {
1453             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1454                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1455             }
1456             m->mothurOutEndLine();
1457         }
1458         
1459         //close files, delete ofstreams
1460         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1461         inReverseFasta.close(); 
1462         inForwardFasta.close(); 
1463         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1464         
1465         return files;
1466     }
1467     catch(exception& e) {
1468         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1469         exit(1);
1470     }
1471 }
1472 //**********************************************************************************************************************
1473 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1474     try {
1475         vector<pairFastqRead> reads;
1476         map<string, fastqRead>::iterator itUniques;
1477             
1478         if (!ignoref && !ignorer) {
1479             if (forward.name == reverse.name) { 
1480                 pairFastqRead temp(forward, reverse);
1481                 reads.push_back(temp);
1482             }else {
1483                 bool match = false;
1484                 //if no match are the names only different by 1 and 2?
1485                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1486                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1487                 if (tempFRead == tempRRead) {
1488                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1489                         forward.name = tempFRead;
1490                         reverse.name = tempRRead;
1491                         pairFastqRead temp(forward, reverse);
1492                         reads.push_back(temp);
1493                         match = true;
1494                     }
1495                 }
1496                 
1497                 if (!match) {
1498                     //look for forward pair
1499                     itUniques = uniques.find(forward.name);
1500                     if (itUniques != uniques.end()) {  //we have the pair for this read
1501                         pairFastqRead temp(forward, itUniques->second);
1502                         reads.push_back(temp);
1503                         uniques.erase(itUniques);
1504                     }else { //save this read for later
1505                         uniques[forward.name] = forward;
1506                     }
1507                     
1508                     //look for reverse pair
1509                     itUniques = uniques.find(reverse.name);
1510                     if (itUniques != uniques.end()) {  //we have the pair for this read
1511                         pairFastqRead temp(itUniques->second, reverse);
1512                         reads.push_back(temp);
1513                         uniques.erase(itUniques);
1514                     }else { //save this read for later
1515                         uniques[reverse.name] = reverse;
1516                     }
1517                 }
1518                                 
1519             }
1520         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1521             if (allowOne) {
1522                 fastqRead dummy;
1523                 pairFastqRead temp(forward, dummy);
1524                 reads.push_back(temp);
1525             }else {
1526                 //look for forward pair
1527                 itUniques = uniques.find(forward.name);
1528                 if (itUniques != uniques.end()) {  //we have the pair for this read
1529                     pairFastqRead temp(forward, itUniques->second);
1530                     reads.push_back(temp);
1531                     uniques.erase(itUniques);
1532                 }else { //save this read for later
1533                     uniques[forward.name] = forward;
1534                 }
1535             }
1536         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1537             if (allowOne) {
1538                 fastqRead dummy;
1539                 pairFastqRead temp(dummy, reverse);
1540                 reads.push_back(temp);
1541             }else {
1542                 //look for reverse pair
1543                 itUniques = uniques.find(reverse.name);
1544                 if (itUniques != uniques.end()) {  //we have the pair for this read
1545                     pairFastqRead temp(itUniques->second, reverse);
1546                     reads.push_back(temp);
1547                     uniques.erase(itUniques);
1548                 }else { //save this read for later
1549                     uniques[reverse.name] = reverse;
1550                 }
1551             }
1552         }//else ignore both and do nothing
1553         
1554         return reads;
1555     }
1556     catch(exception& e) {
1557         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1558         exit(1);
1559     }
1560 }
1561 //**********************************************************************************************************************
1562 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1563 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1564     try {
1565         vector<pairFastqRead> reads;
1566         map<string, pairFastqRead>::iterator itUniques;
1567         
1568         set<int> foundIndexes;
1569         for (int i = 0; i < thisReads.size(); i++) {
1570             bool found = false;
1571             for (int j = 0; j < indexes.size(); j++) {
1572                 
1573                 //incase only one index
1574                 string indexName = indexes[j].forward.name;
1575                 if (indexName == "") { indexName = indexes[j].reverse.name; }
1576                 
1577                 if (thisReads[i].forward.name == indexName){
1578                     thisReads[i].findex = indexes[j].forward;
1579                     thisReads[i].rindex = indexes[j].reverse;
1580                     reads.push_back(thisReads[i]);
1581                     found = true;
1582                     foundIndexes.insert(j);
1583                 }
1584             }
1585             
1586             if (!found) {
1587                 //look for forward pair
1588                 itUniques = uniques.find('i'+thisReads[i].forward.name);
1589                 if (itUniques != uniques.end()) {  //we have the pair for this read
1590                     thisReads[i].findex = itUniques->second.forward;
1591                     thisReads[i].rindex = itUniques->second.reverse;
1592                     reads.push_back(thisReads[i]);
1593                     uniques.erase(itUniques);
1594                 }else { //save this read for later
1595                     uniques['r'+thisReads[i].forward.name] = thisReads[i];
1596                 }
1597             }
1598         }
1599         
1600         if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1601             for (int j = 0; j < indexes.size(); j++) {
1602                 if (foundIndexes.count(j) == 0) { //we didnt find this one
1603                     //incase only one index
1604                     string indexName = indexes[j].forward.name;
1605                     if (indexName == "") { indexName = indexes[j].reverse.name; }
1606                     
1607                     //look for forward pair
1608                     itUniques = uniques.find('r'+indexName);
1609                     if (itUniques != uniques.end()) {  //we have the pair for this read
1610                         pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1611                         reads.push_back(temp);
1612                         uniques.erase(itUniques);
1613                     }else { //save this read for later
1614                         uniques['i'+indexName] = indexes[j];
1615                     }
1616                 }
1617             }
1618         }
1619        
1620         
1621         return reads;
1622     }
1623     catch(exception& e) {
1624         m->errorOut(e, "MakeContigsCommand", "mergeReads");
1625         exit(1);
1626     }
1627 }
1628 //**********************************************************************************************************************
1629 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1630     try {
1631         fastqRead read;
1632         
1633         ignore = false;
1634         
1635         //read sequence name
1636         string line = m->getline(in); m->gobble(in);
1637         vector<string> pieces = m->splitWhiteSpace(line);
1638         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1639         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1640         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1641         else { name = name.substr(1); }
1642         
1643         //read sequence
1644         string sequence = m->getline(in); m->gobble(in);
1645         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1646         
1647         //read sequence name
1648         line = m->getline(in); m->gobble(in);
1649         pieces = m->splitWhiteSpace(line);
1650         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1651         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1652         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1653         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1654         
1655         //read quality scores
1656         string quality = m->getline(in); m->gobble(in);
1657         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1658          
1659         //sanity check sequence length and number of quality scores match
1660         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1661         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1662         
1663         vector<int> qualScores = convertQual(quality);
1664         
1665         m->checkName(name);
1666         read.name = name;
1667         read.sequence = sequence;
1668         read.scores = qualScores;
1669         
1670         if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1671
1672         return read;
1673     }
1674     catch(exception& e) {
1675         m->errorOut(e, "MakeContigsCommand", "readFastq");
1676         exit(1);
1677     }
1678 }
1679 /**********************************************************************************************************************
1680 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1681     try {
1682         bool good = true;
1683         
1684         //do sequence lengths match
1685         if (forward.sequence.length() != reverse.sequence.length()) {
1686             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1687             good = false; 
1688         }
1689         
1690         //do number of qual scores match 
1691         if (forward.scores.size() != reverse.scores.size()) {
1692             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1693             good = false; 
1694         }
1695
1696         return good;
1697     }
1698     catch(exception& e) {
1699         m->errorOut(e, "MakeContigsCommand", "checkReads");
1700         exit(1);
1701     }
1702 }*/
1703 //***************************************************************************************************************
1704 //lines can be 2, 3, or 4 columns
1705 // forward.fastq reverse.fastq -> 2 column
1706 // groupName forward.fastq reverse.fastq -> 3 column
1707 // forward.fastq reverse.fastq forward.index.fastq  reverse.index.fastq  -> 4 column
1708 // forward.fastq reverse.fastq none  reverse.index.fastq  -> 4 column
1709 // forward.fastq reverse.fastq forward.index.fastq  none  -> 4 column
1710 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1711         try {
1712         vector< vector<string> > files;
1713         string forward, reverse, findex, rindex;
1714         
1715         ifstream in;
1716         m->openInputFile(filename, in);
1717         
1718         while(!in.eof()) {
1719             
1720             if (m->control_pressed) { return files; }
1721             
1722             string line = m->getline(in);  m->gobble(in);
1723             vector<string> pieces = m->splitWhiteSpace(line);
1724             
1725             string group = "";
1726             if (pieces.size() == 2) {
1727                 forward = pieces[0];
1728                 reverse = pieces[1];
1729                 group = "";
1730                 findex = "";
1731                 rindex = "";
1732             }else if (pieces.size() == 3) {
1733                 group = pieces[0];
1734                 forward = pieces[1];
1735                 reverse = pieces[2];
1736                 findex = "";
1737                 rindex = "";
1738                 createFileGroup = true;
1739             }else if (pieces.size() == 4) {
1740                 forward = pieces[0];
1741                 reverse = pieces[1];
1742                 findex = pieces[2];
1743                 rindex = pieces[3];
1744                 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1745                 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1746             }else {
1747                 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file.  If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1748             }
1749             
1750             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1751             
1752             //check to make sure both are able to be opened
1753             ifstream in2;
1754             int openForward = m->openInputFile(forward, in2, "noerror");
1755             
1756             //if you can't open it, try default location
1757             if (openForward == 1) {
1758                 if (m->getDefaultPath() != "") { //default path is set
1759                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1760                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1761                     ifstream in3;
1762                     openForward = m->openInputFile(tryPath, in3, "noerror");
1763                     in3.close();
1764                     forward = tryPath;
1765                 }
1766             }
1767             
1768             //if you can't open it, try output location
1769             if (openForward == 1) {
1770                 if (m->getOutputDir() != "") { //default path is set
1771                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1772                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1773                     ifstream in4;
1774                     openForward = m->openInputFile(tryPath, in4, "noerror");
1775                     forward = tryPath;
1776                     in4.close();
1777                 }
1778             }
1779             
1780             if (openForward == 1) { //can't find it
1781                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1782             }else{  in2.close();  }
1783             
1784             ifstream in3;
1785             int openReverse = m->openInputFile(reverse, in3, "noerror");
1786             
1787             //if you can't open it, try default location
1788             if (openReverse == 1) {
1789                 if (m->getDefaultPath() != "") { //default path is set
1790                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1791                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1792                     ifstream in3;
1793                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1794                     in3.close();
1795                     reverse = tryPath;
1796                 }
1797             }
1798             
1799             //if you can't open it, try output location
1800             if (openReverse == 1) {
1801                 if (m->getOutputDir() != "") { //default path is set
1802                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1803                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1804                     ifstream in4;
1805                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1806                     reverse = tryPath;
1807                     in4.close();
1808                 }
1809             }
1810             
1811             if (openReverse == 1) { //can't find it
1812                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1813             }else{  in3.close();  }
1814             
1815             int openFindex = 0;
1816             if (findex != "") {
1817                 ifstream in4;
1818                 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1819                 
1820                 //if you can't open it, try default location
1821                 if (openFindex == 1) {
1822                     if (m->getDefaultPath() != "") { //default path is set
1823                         string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1824                         m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1825                         ifstream in5;
1826                         openFindex = m->openInputFile(tryPath, in5, "noerror");
1827                         in5.close();
1828                         findex = tryPath;
1829                     }
1830                 }
1831                 
1832                 //if you can't open it, try output location
1833                 if (openFindex == 1) {
1834                     if (m->getOutputDir() != "") { //default path is set
1835                         string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1836                         m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1837                         ifstream in6;
1838                         openFindex = m->openInputFile(tryPath, in6, "noerror");
1839                         findex = tryPath;
1840                         in6.close();
1841                     }
1842                 }
1843                 
1844                 if (openFindex == 1) { //can't find it
1845                     m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1846                 }
1847             }
1848             
1849             int openRindex = 0;
1850             if (rindex != "") {
1851                 ifstream in7;
1852                 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1853                 
1854                 //if you can't open it, try default location
1855                 if (openRindex == 1) {
1856                     if (m->getDefaultPath() != "") { //default path is set
1857                         string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1858                         m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1859                         ifstream in8;
1860                         openRindex = m->openInputFile(tryPath, in8, "noerror");
1861                         in8.close();
1862                         rindex = tryPath;
1863                     }
1864                 }
1865                 
1866                 //if you can't open it, try output location
1867                 if (openRindex == 1) {
1868                     if (m->getOutputDir() != "") { //default path is set
1869                         string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1870                         m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1871                         ifstream in9;
1872                         openRindex = m->openInputFile(tryPath, in9, "noerror");
1873                         rindex = tryPath;
1874                         in9.close();
1875                     }
1876                 }
1877                 
1878                 if (openRindex == 1) { //can't find it
1879                     m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1880                 }
1881             }
1882
1883             
1884             if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1885                 file2Group[files.size()] = group;
1886                 vector<string> pair;
1887                 pair.push_back(forward);
1888                 pair.push_back(reverse);
1889                 pair.push_back(findex);
1890                 pair.push_back(rindex);
1891                 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true;  }
1892                 files.push_back(pair);
1893             }
1894         }
1895         in.close();
1896         
1897         return files;
1898     }
1899     catch(exception& e) {
1900         m->errorOut(e, "MakeContigsCommand", "readFileNames");
1901         exit(1);
1902     }
1903 }
1904 //***************************************************************************************************************
1905 //illumina data requires paired forward and reverse data
1906 //BARCODE   atgcatgc   atgcatgc    groupName 
1907 //PRIMER   atgcatgc   atgcatgc    groupName  
1908 //PRIMER   atgcatgc   atgcatgc  
1909 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1910         try {
1911                 ifstream in;
1912                 m->openInputFile(oligosfile, in);
1913                 
1914                 ofstream test;
1915                 
1916                 string type, foligo, roligo, group;
1917         
1918                 int indexPrimer = 0;
1919                 int indexBarcode = 0;
1920         set<string> uniquePrimers;
1921         set<string> uniqueBarcodes;
1922                 
1923                 while(!in.eof()){
1924             
1925                         in >> type; 
1926     
1927                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1928             
1929                         if(type[0] == '#'){
1930                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1931                                 m->gobble(in);
1932                         }
1933                         else{
1934                                 m->gobble(in);
1935                                 //make type case insensitive
1936                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1937                                 
1938                                 in >> foligo;
1939                 
1940                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1941                                 
1942                                 for(int i=0;i<foligo.length();i++){
1943                                         foligo[i] = toupper(foligo[i]);
1944                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1945                                 }
1946                                 
1947                                 if(type == "PRIMER"){
1948                                         m->gobble(in);
1949                                         
1950                     in >> roligo;
1951                     
1952                     for(int i=0;i<roligo.length();i++){
1953                         roligo[i] = toupper(roligo[i]);
1954                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1955                     }
1956                     //roligo = reverseOligo(roligo);
1957                     
1958                     if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1959                     
1960                     group = "";
1961                     
1962                                         // get rest of line in case there is a primer name
1963                                         while (!in.eof())       {       
1964                                                 char c = in.get(); 
1965                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1966                                                 else if (c == 32 || c == 9){;} //space or tab
1967                                                 else {  group += c;  }
1968                                         } 
1969                     
1970                     oligosPair newPrimer(foligo, roligo);
1971                     
1972                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1973                     
1974                                         //check for repeat barcodes
1975                     string tempPair = foligo+roligo;
1976                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1977                     else { uniquePrimers.insert(tempPair); }
1978                                         
1979                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1980                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1981                                         primerNameVector.push_back(group);
1982                                 }else if(type == "BARCODE"){
1983                                         m->gobble(in);
1984                                         
1985                     in >> roligo;
1986                     
1987                     for(int i=0;i<roligo.length();i++){
1988                         roligo[i] = toupper(roligo[i]);
1989                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1990                     }
1991                     //roligo = reverseOligo(roligo);
1992                     
1993                     oligosPair newPair(foligo, roligo);
1994                     
1995                     if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
1996                     
1997                     group = "";
1998                     while (!in.eof())   {       
1999                                                 char c = in.get(); 
2000                                                 if (c == 10 || c == 13 || c == -1){     break;  }
2001                                                 else if (c == 32 || c == 9){;} //space or tab
2002                                                 else {  group += c;  }
2003                                         } 
2004                                         
2005                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2006                         
2007                     //check for repeat barcodes
2008                     string tempPair = foligo+roligo;
2009                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
2010                     else { uniqueBarcodes.insert(tempPair); }
2011                         
2012                     barcodes[indexBarcode]=newPair; indexBarcode++;
2013                                         barcodeNameVector.push_back(group);
2014                                 }else if(type == "LINKER"){
2015                                         linker.push_back(foligo);
2016                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2017                                 }else if(type == "SPACER"){
2018                                         spacer.push_back(foligo);
2019                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2020                                 }
2021                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2022                         }
2023                         m->gobble(in);
2024                 }       
2025                 in.close();
2026                 
2027                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
2028                 
2029                 //add in potential combos
2030                 if(barcodeNameVector.size() == 0){
2031             oligosPair temp("", "");
2032                         barcodes[0] = temp;
2033                         barcodeNameVector.push_back("");                        
2034                 }
2035                 
2036                 if(primerNameVector.size() == 0){
2037             oligosPair temp("", "");
2038                         primers[0] = temp;
2039                         primerNameVector.push_back("");                 
2040                 }
2041                 
2042                 fastaFileNames.resize(barcodeNameVector.size());
2043                 for(int i=0;i<fastaFileNames.size();i++){
2044                         fastaFileNames[i].assign(primerNameVector.size(), "");
2045                 }
2046                 
2047                 if(allFiles){
2048                         set<string> uniqueNames; //used to cleanup outputFileNames
2049                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2050                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2051                                         
2052                                         string primerName = primerNameVector[itPrimer->first];
2053                                         string barcodeName = barcodeNameVector[itBar->first];
2054                     
2055                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
2056                                         else {
2057                         string comboGroupName = "";
2058                         string fastaFileName = "";
2059                         string qualFileName = "";
2060                         string nameFileName = "";
2061                         string countFileName = "";
2062                         
2063                         if(primerName == ""){
2064                             comboGroupName = barcodeNameVector[itBar->first];
2065                         }
2066                         else{
2067                             if(barcodeName == ""){
2068                                 comboGroupName = primerNameVector[itPrimer->first];
2069                             }
2070                             else{
2071                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2072                             }
2073                         }
2074                         
2075                         
2076                         ofstream temp;
2077                         fastaFileName = rootname + comboGroupName + ".fasta";
2078                         if (uniqueNames.count(fastaFileName) == 0) {
2079                             outputNames.push_back(fastaFileName);
2080                             outputTypes["fasta"].push_back(fastaFileName);
2081                             uniqueNames.insert(fastaFileName);
2082                         }
2083                         
2084                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2085                         m->openOutputFile(fastaFileName, temp);         temp.close();
2086                     }
2087                                 }
2088                         }
2089                 }
2090                 
2091                 bool allBlank = true;
2092                 for (int i = 0; i < barcodeNameVector.size(); i++) {
2093                         if (barcodeNameVector[i] != "") {
2094                                 allBlank = false;
2095                                 break;
2096                         }
2097                 }
2098                 for (int i = 0; i < primerNameVector.size(); i++) {
2099                         if (primerNameVector[i] != "") {
2100                                 allBlank = false;
2101                                 break;
2102                         }
2103                 }
2104         
2105                 if (allBlank) {
2106                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
2107                         allFiles = false;
2108                         return false;
2109                 }
2110                 
2111                 return true;
2112                 
2113         }
2114         catch(exception& e) {
2115                 m->errorOut(e, "MakeContigsCommand", "getOligos");
2116                 exit(1);
2117         }
2118 }
2119 //********************************************************************/
2120 string MakeContigsCommand::reverseOligo(string oligo){
2121         try {
2122         string reverse = "";
2123         
2124         for(int i=oligo.length()-1;i>=0;i--){
2125             
2126             if(oligo[i] == 'A')         {       reverse += 'T'; }
2127             else if(oligo[i] == 'T'){   reverse += 'A'; }
2128             else if(oligo[i] == 'U'){   reverse += 'A'; }
2129             
2130             else if(oligo[i] == 'G'){   reverse += 'C'; }
2131             else if(oligo[i] == 'C'){   reverse += 'G'; }
2132             
2133             else if(oligo[i] == 'R'){   reverse += 'Y'; }
2134             else if(oligo[i] == 'Y'){   reverse += 'R'; }
2135             
2136             else if(oligo[i] == 'M'){   reverse += 'K'; }
2137             else if(oligo[i] == 'K'){   reverse += 'M'; }
2138             
2139             else if(oligo[i] == 'W'){   reverse += 'W'; }
2140             else if(oligo[i] == 'S'){   reverse += 'S'; }
2141             
2142             else if(oligo[i] == 'B'){   reverse += 'V'; }
2143             else if(oligo[i] == 'V'){   reverse += 'B'; }
2144             
2145             else if(oligo[i] == 'D'){   reverse += 'H'; }
2146             else if(oligo[i] == 'H'){   reverse += 'D'; }
2147             
2148             else                                                {       reverse += 'N'; }
2149         }
2150         
2151         
2152         return reverse;
2153     }
2154         catch(exception& e) {
2155                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2156                 exit(1);
2157         }
2158 }
2159 //**********************************************************************************************************************
2160 vector<int> MakeContigsCommand::convertQual(string qual) {
2161         try {
2162                 vector<int> qualScores;
2163         bool negativeScores = false;
2164                 
2165                 for (int i = 0; i < qual.length(); i++) { 
2166             
2167             int temp = 0;
2168             temp = int(qual[i]);
2169             if (format == "illumina") {
2170                 temp -= 64; //char '@'
2171             }else if (format == "illumina1.8+") {
2172                     temp -= int('!'); //char '!'
2173             }else if (format == "solexa") {
2174                 temp = int(convertTable[temp]); //convert to sanger
2175                 temp -= int('!'); //char '!'
2176             }else {
2177                 temp -= int('!'); //char '!'
2178             }
2179             
2180             if (temp < -5) { negativeScores = true; }
2181                         qualScores.push_back(temp);
2182                 }
2183                 
2184         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
2185         
2186                 return qualScores;
2187         }
2188         catch(exception& e) {
2189                 m->errorOut(e, "MakeContigsCommand", "convertQual");
2190                 exit(1);
2191         }
2192 }
2193
2194 //**********************************************************************************************************************
2195
2196
2197
2198