]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none","fasta-qfile",false,true,true); parameters.push_back(pfasta);
15         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none","fasta-qfile",false,true,true); parameters.push_back(prfasta);
16         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
17                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
18                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
19 //        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
20 //              CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
21         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
22
23         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
24         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
25                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
26                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
27                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
28                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
29         CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold);
30                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "MakeContigsCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string MakeContigsCommand::getHelpString(){     
45         try {
46                 string helpString = "";
47                 helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
48         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
49                 helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
50                 helpString += "The ffastq and rfastq parameters are required.\n";
51                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
52         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
53                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
54                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
55         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
56                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
57                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
58                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
59                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
60                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
61         helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
62         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
63         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
64         helpString += "The make.contigs command should be in the following format: \n";
65                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
66                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
67                 return helpString;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 string MakeContigsCommand::getOutputPattern(string type) {
76     try {
77         string pattern = "";
78         
79         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
80         else if (type == "qfile") {  pattern = "[filename],[tag],contigs.qual"; } 
81         else if (type == "group") {  pattern = "[filename],[tag],groups"; }
82         else if (type == "mismatch") {  pattern = "[filename],[tag],contigs.mismatch"; }
83         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
84         
85         return pattern;
86     }
87     catch(exception& e) {
88         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
89         exit(1);
90     }
91 }
92 //**********************************************************************************************************************
93 MakeContigsCommand::MakeContigsCommand(){       
94         try {
95                 abort = true; calledHelp = true; 
96                 setParameters();
97                 vector<string> tempOutNames;
98                 outputTypes["fasta"] = tempOutNames;
99                 outputTypes["qfile"] = tempOutNames;
100         outputTypes["group"] = tempOutNames;
101         outputTypes["mismatch"] = tempOutNames;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
105                 exit(1);
106         }
107 }
108 //**********************************************************************************************************************
109 MakeContigsCommand::MakeContigsCommand(string option)  {
110         try {
111                 abort = false; calledHelp = false;   
112         
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116                 
117                 else {
118                         vector<string> myArray = setParameters();
119                         
120                         OptionParser parser(option);
121                         map<string, string> parameters = parser.getParameters(); 
122                         
123                         ValidParameters validParameter("pairwise.seqs");
124                         map<string, string>::iterator it;
125                         
126                         //check to make sure all parameters are valid for command
127                         for (it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["fasta"] = tempOutNames;
134                         outputTypes["qfile"] = tempOutNames;
135             outputTypes["mismatch"] = tempOutNames;
136             outputTypes["group"] = tempOutNames;
137                         
138             
139                         //if the user changes the input directory command factory will send this info to us in the output parameter 
140                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
141                         if (inputDir == "not found"){   inputDir = "";          }
142                         else { 
143                                 string path;
144                 it = parameters.find("ffastq");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
150                                 }
151                 
152                 it = parameters.find("rfastq");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
158                                 }
159                 
160                 it = parameters.find("oligos");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
166                                 }
167             }
168             
169             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
170                         if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
171                         else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
172                         
173                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
174                         if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
175                         else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
176             
177             oligosfile = validParameter.validFile(parameters, "oligos", true);
178                         if (oligosfile == "not found")      {   oligosfile = "";        }
179                         else if(oligosfile == "not open")   {   abort = true;       } 
180                         else {   m->setOligosFile(oligosfile);          }
181             
182             //if the user changes the output directory command factory will send this info to us in the output parameter 
183                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
184                         
185
186                         //check for optional parameter and set defaults
187                         // ...at some point should added some additional type checking...
188                         string temp;
189                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
190                         m->mothurConvert(temp, match);  
191                         
192                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
193                         m->mothurConvert(temp, misMatch);  
194             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
195                         
196                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
197                         m->mothurConvert(temp, gapOpen);  
198             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
199                         
200                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
201                         m->mothurConvert(temp, gapExtend); 
202             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
203                         
204             temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "40";                    }
205                         m->mothurConvert(temp, threshold); 
206             if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
207
208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
209                         m->setProcessors(temp);
210                         m->mothurConvert(temp, processors);
211             
212             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
213                         m->mothurConvert(temp, bdiffs);
214                         
215                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
216                         m->mothurConvert(temp, pdiffs);
217             
218   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
219 //                      m->mothurConvert(temp, ldiffs);
220             ldiffs = 0;
221             
222  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
223  //           m->mothurConvert(temp, sdiffs);
224             sdiffs = 0;
225                         
226                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
227                         m->mothurConvert(temp, tdiffs);
228                         
229                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
230
231             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
232                         allFiles = m->isTrue(temp);
233                         
234                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
235                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
236         }
237                 
238         }
239         catch(exception& e) {
240                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
241                 exit(1);
242         }
243 }
244 //**********************************************************************************************************************
245 int MakeContigsCommand::execute(){
246         try {
247                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
248         
249         //read ffastq and rfastq files creating fasta and qual files.
250         //this function will create a forward and reverse, fasta and qual files for each processor.
251         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
252         int numReads = 0;
253         int start = time(NULL);
254         longestBase = 1000;
255         m->mothurOut("Reading fastq data...\n"); 
256         vector< vector<string> > files = readFastqFiles(numReads);  
257         m->mothurOut("Done.\n");
258     
259         if (m->control_pressed) { return 0; }
260         
261         vector<vector<string> > fastaFileNames;
262                 vector<vector<string> > qualFileNames;
263         createGroup = false;
264         string outputGroupFileName;
265         map<string, string> variables; 
266         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(ffastqfile));
267         variables["[tag]"] = "";
268         if(oligosfile != ""){
269                         createGroup = getOligos(fastaFileNames, qualFileNames);
270             if (createGroup) { 
271                 outputGroupFileName = getOutputFileName("group",variables);
272                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
273             }
274                 }
275         
276         variables["[tag]"] = "trim";
277         string outFastaFile = getOutputFileName("fasta",variables);
278         string outQualFile = getOutputFileName("qfile",variables);
279         variables["[tag]"] = "scrap";
280         string outScrapFastaFile = getOutputFileName("fasta",variables);
281         string outScrapQualFile = getOutputFileName("qfile",variables);
282
283         variables["[tag]"] = "";
284         string outMisMatchFile = getOutputFileName("mismatch",variables);
285         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
286         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
287         outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
288         outputNames.push_back(outScrapQualFile); outputTypes["qfile"].push_back(outScrapQualFile);
289         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
290         
291         m->mothurOut("Making contigs...\n"); 
292         createProcesses(files, outFastaFile, outQualFile, outScrapFastaFile, outScrapQualFile, outMisMatchFile, fastaFileNames, qualFileNames);
293         m->mothurOut("Done.\n");
294         
295         //remove temp fasta and qual files
296         for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
297         
298         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
299         
300         if(allFiles){
301                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
302                         map<string, string>::iterator it;
303                         set<string> namesToRemove;
304                         for(int i=0;i<fastaFileNames.size();i++){
305                                 for(int j=0;j<fastaFileNames[0].size();j++){
306                                         if (fastaFileNames[i][j] != "") {
307                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
308                                                         if(m->isBlank(fastaFileNames[i][j])){
309                                                                 m->mothurRemove(fastaFileNames[i][j]);
310                                                                 namesToRemove.insert(fastaFileNames[i][j]);
311
312                                 m->mothurRemove(qualFileNames[i][j]);
313                                 namesToRemove.insert(qualFileNames[i][j]);
314                                                         }else{  
315                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
316                                                                 if (it == uniqueFastaNames.end()) {     
317                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
318                                                                 }       
319                                                         }
320                                                 }
321                                         }
322                                 }
323                         }
324                         
325                         //remove names for outputFileNames, just cleans up the output
326                         vector<string> outputNames2;
327                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
328                         outputNames = outputNames2;
329                         
330             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
331                 ifstream in;
332                 m->openInputFile(it->first, in);
333                 
334                 ofstream out;
335                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
336                 thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
337                 m->openOutputFile(thisGroupName, out);
338                  
339                 while (!in.eof()){
340                     if (m->control_pressed) { break; }
341                     
342                     Sequence currSeq(in); m->gobble(in);
343                     out << currSeq.getName() << '\t' << it->second << endl;  
344                 }
345                 in.close();
346                 out.close();
347             }
348         }
349         
350         if (createGroup) {
351             ofstream outGroup;
352             m->openOutputFile(outputGroupFileName, outGroup);
353             for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
354                 outGroup << itGroup->first << '\t' << itGroup->second << endl;
355             }
356             outGroup.close();
357         }
358         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
359         
360         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
361         
362                 //output group counts
363                 m->mothurOutEndLine();
364                 int total = 0;
365                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
366                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
367             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
368                 }
369                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
370                 
371                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
372         
373         string currentFasta = "";
374                 itTypes = outputTypes.find("fasta");
375                 if (itTypes != outputTypes.end()) {
376                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
377                 }
378         
379         string currentQual = "";
380                 itTypes = outputTypes.find("qfile");
381                 if (itTypes != outputTypes.end()) {
382                         if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
383                 }
384         
385         string currentGroup = "";
386                 itTypes = outputTypes.find("group");
387                 if (itTypes != outputTypes.end()) {
388                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
389                 }
390                 
391         //output files created by command
392                 m->mothurOutEndLine();
393                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
394                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
395                 m->mothurOutEndLine();
396
397         return 0;
398     }
399         catch(exception& e) {
400                 m->errorOut(e, "MakeContigsCommand", "execute");
401                 exit(1);
402         }
403 }
404 //**********************************************************************************************************************
405 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
406         try {
407                 int num = 0;
408                 vector<int> processIDS;
409 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
410                 int process = 0;
411                 
412                 //loop through and create all the processes you want
413                 while (process != processors-1) {
414                         int pid = fork();
415                         
416                         if (pid > 0) {
417                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
418                                 process++;
419                         }else if (pid == 0){
420                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
421                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
422                 
423                                 if(allFiles){
424                                         ofstream temp;
425                     
426                                         for(int i=0;i<tempFASTAFileNames.size();i++){
427                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
428                                                         if (tempFASTAFileNames[i][j] != "") {
429                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
430                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
431                                 
432                                 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
433                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
434                                                         }
435                                                 }
436                                         }
437                                 }
438
439                                 num = driver(files[process], 
440                              outputFasta + toString(getpid()) + ".temp", 
441                              outputQual + toString(getpid()) + ".temp", 
442                              outputScrapFasta + toString(getpid()) + ".temp", 
443                              outputScrapQual + toString(getpid()) + ".temp",
444                              outputMisMatches + toString(getpid()) + ".temp",
445                              tempFASTAFileNames,
446                              tempPrimerQualFileNames);
447                                 
448                                 //pass groupCounts to parent
449                 ofstream out;
450                 string tempFile = toString(getpid()) + ".num.temp";
451                 m->openOutputFile(tempFile, out);
452                 out << num << endl;
453                                 if(createGroup){
454                                         out << groupCounts.size() << endl;
455                                         
456                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
457                                                 out << it->first << '\t' << it->second << endl;
458                                         }
459                     
460                     out << groupMap.size() << endl;
461                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
462                                                 out << it->first << '\t' << it->second << endl;
463                                         }
464                                 }
465                 out.close();
466                                 
467                                 exit(0);
468                         }else { 
469                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
470                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
471                                 exit(0);
472                         }
473                 }
474                 
475         ofstream temp;
476                 m->openOutputFile(outputFasta, temp);           temp.close();
477                 m->openOutputFile(outputQual, temp);    temp.close();
478         m->openOutputFile(outputScrapFasta, temp);              temp.close();
479         m->openOutputFile(outputScrapQual, temp);               temp.close();
480         
481                 //do my part
482                 num = driver(files[processors-1], outputFasta, outputQual, outputScrapFasta, outputScrapQual, outputMisMatches, fastaFileNames, qualFileNames);
483                 
484                 //force parent to wait until all the processes are done
485                 for (int i=0;i<processIDS.size();i++) { 
486                         int temp = processIDS[i];
487                         wait(&temp);
488                 }
489         
490                 for (int i = 0; i < processIDS.size(); i++) {
491             ifstream in;
492             string tempFile = toString(processIDS[i]) + ".num.temp";
493             m->openInputFile(tempFile, in);
494             int tempNum;
495             in >> tempNum; num += tempNum; m->gobble(in);
496             
497                         if(createGroup){
498                                 string group;
499                                 in >> tempNum; m->gobble(in);
500                                 
501                                 if (tempNum != 0) {
502                                         for (int j = 0; j < tempNum; j++) { 
503                         int groupNum;
504                                                 in >> group >> groupNum; m->gobble(in);
505                         
506                                                 map<string, int>::iterator it = groupCounts.find(group);
507                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
508                                                 else { groupCounts[it->first] += groupNum; }
509                                         }
510                                 }
511                 in >> tempNum; m->gobble(in);
512                 if (tempNum != 0) {
513                                         for (int j = 0; j < tempNum; j++) { 
514                         string group, seqName;
515                                                 in >> seqName >> group; m->gobble(in);
516                         
517                                                 map<string, string>::iterator it = groupMap.find(seqName);
518                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
519                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
520                                         }
521                                 }
522                         }
523             in.close(); m->mothurRemove(tempFile);
524         }
525     #else
526         
527         //////////////////////////////////////////////////////////////////////////////////////////////////////
528                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
529                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
530                 //////////////////////////////////////////////////////////////////////////////////////////////////////
531                 
532                 vector<contigsData*> pDataArray; 
533                 DWORD   dwThreadIdArray[processors-1];
534                 HANDLE  hThreadArray[processors-1]; 
535                 
536                 //Create processor worker threads.
537                 for( int h=0; h<processors-1; h++ ){
538                         string extension = "";
539                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
540             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
541             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
542             
543             if(allFiles){
544                 ofstream temp;
545                 
546                 for(int i=0;i<tempFASTAFileNames.size();i++){
547                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
548                         if (tempFASTAFileNames[i][j] != "") {
549                             tempFASTAFileNames[i][j] += extension;
550                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
551                             
552                             
553                             tempPrimerQualFileNames[i][j] += extension;
554                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
555                         }
556                     }
557                 }
558             }
559
560                                   
561                         contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputQual + extension), (outputScrapFasta + extension), (outputScrapQual + extension),(outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, barcodes, primers, tempFASTAFileNames, tempPrimerQualFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
562                         pDataArray.push_back(tempcontig);
563             
564                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
565                 }
566         
567         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
568         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
569
570         if(allFiles){
571             ofstream temp;
572             string extension = toString(processors-1) + ".temp";
573             
574             for(int i=0;i<tempFASTAFileNames.size();i++){
575                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
576                     if (tempFASTAFileNames[i][j] != "") {
577                         tempFASTAFileNames[i][j] += extension;
578                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
579                         
580                         
581                         tempPrimerQualFileNames[i][j] += extension;
582                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
583                     }
584                 }
585             }
586         }
587
588                 //parent do my part
589                 ofstream temp;
590                 m->openOutputFile(outputFasta, temp);           temp.close();
591                 m->openOutputFile(outputQual, temp);    temp.close();
592         m->openOutputFile(outputScrapFasta, temp);              temp.close();
593         m->openOutputFile(outputScrapQual, temp);               temp.close();
594                 
595         
596         //do my part
597                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputQual+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputScrapQual+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames);       
598         
599                 //Wait until all threads have terminated.
600                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
601                 
602                 //Close all thread handles and free memory allocations.
603                 for(int i=0; i < pDataArray.size(); i++){
604                         num += pDataArray[i]->count;
605             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
606                 map<string, int>::iterator it2 = groupCounts.find(it->first);
607                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
608                 else { groupCounts[it->first] += it->second; }
609             }
610             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
611                 map<string, string>::iterator it2 = groupMap.find(it->first);
612                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
613                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
614             }
615             CloseHandle(hThreadArray[i]);
616                         delete pDataArray[i];
617         }
618                                 
619     #endif      
620         
621         for (int i = 0; i < processIDS.size(); i++) {
622                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
623                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
624                         
625                         m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
626                         m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
627             
628             m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
629                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
630                         
631                         m->appendFiles((outputScrapQual + toString(processIDS[i]) + ".temp"), outputScrapQual);
632                         m->mothurRemove((outputScrapQual + toString(processIDS[i]) + ".temp"));
633             
634             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
635                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
636             
637             if(allFiles){
638                                 for(int j=0;j<fastaFileNames.size();j++){
639                                         for(int k=0;k<fastaFileNames[j].size();k++){
640                                                 if (fastaFileNames[j][k] != "") {
641                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
642                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
643                                                         
644                             m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
645                             m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
646                                                 }
647                                         }
648                                 }
649                         }
650                 }
651                 
652                 return num;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
656                 exit(1);
657         }
658 }
659 //**********************************************************************************************************************
660 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames){
661     try {
662         
663         Alignment* alignment;
664         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
665                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
666         
667         int num = 0;
668         string thisffastafile = files[0];
669         string thisfqualfile = files[1];
670         string thisrfastafile = files[2];
671         string thisrqualfile = files[3];
672         
673         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
674         
675         ifstream inFFasta, inRFasta, inFQual, inRQual;
676         m->openInputFile(thisffastafile, inFFasta);
677         m->openInputFile(thisfqualfile, inFQual);
678         m->openInputFile(thisrfastafile, inRFasta);
679         m->openInputFile(thisrqualfile, inRQual);
680         
681         ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
682         m->openOutputFile(outputFasta, outFasta);
683         m->openOutputFile(outputQual, outQual);
684         m->openOutputFile(outputScrapFasta, outScrapFasta);
685         m->openOutputFile(outputScrapQual, outScrapQual);
686         m->openOutputFile(outputMisMatches, outMisMatch);
687         outMisMatch << "Name\tLength\tMisMatches\n";
688         
689         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
690         
691         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
692             
693             if (m->control_pressed) { break; }
694             
695             int success = 1;
696             string trashCode = "";
697             int currentSeqsDiffs = 0;
698
699             //read seqs and quality info
700             Sequence fSeq(inFFasta); m->gobble(inFFasta);
701             Sequence rSeq(inRFasta); m->gobble(inRFasta);
702             QualityScores fQual(inFQual); m->gobble(inFQual);
703             QualityScores rQual(inRQual); m->gobble(inRQual);
704             
705             int barcodeIndex = 0;
706             int primerIndex = 0;
707             
708             if(barcodes.size() != 0){
709                 success = trimOligos.stripBarcode(fSeq, rSeq, fQual, rQual, barcodeIndex);
710                 if(success > bdiffs)            {       trashCode += 'b';       }
711                 else{ currentSeqsDiffs += success;  }
712             }
713             
714             if(primers.size() != 0){
715                 success = trimOligos.stripForward(fSeq, rSeq, fQual, rQual, primerIndex);
716                 if(success > pdiffs)            {       trashCode += 'f';       }
717                 else{ currentSeqsDiffs += success;  }
718             }
719             
720             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
721             
722             //flip the reverse reads
723             rSeq.reverseComplement();
724             rQual.flipQScores();
725
726             //pairwise align
727             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
728             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
729             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
730             fSeq.setAligned(alignment->getSeqAAln());
731             rSeq.setAligned(alignment->getSeqBAln());
732             int length = fSeq.getAligned().length();
733             
734             //traverse alignments merging into one contiguous seq
735             string contig = "";
736             vector<int> contigScores; 
737             int numMismatches = 0;
738             string seq1 = fSeq.getAligned();
739             string seq2 = rSeq.getAligned();
740             vector<int> scores1 = fQual.getQualityScores();
741             vector<int> scores2 = rQual.getQualityScores();
742             
743             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
744             int overlapStart = fSeq.getStartPos();
745             int seq2Start = rSeq.getStartPos();
746             //bigger of the 2 starting positions is the location of the overlapping start
747             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
748                 overlapStart = seq2Start; 
749                 for (int i = 0; i < overlapStart; i++) {
750                     contig += seq1[i];
751                     contigScores.push_back(scores1[ABaseMap[i]]);
752                 }
753             }else { //seq1 starts later so take from 0 to overlapStart from seq2
754                 for (int i = 0; i < overlapStart; i++) {
755                     contig += seq2[i];
756                     contigScores.push_back(scores2[BBaseMap[i]]);
757                 }
758             }
759             
760             int seq1End = fSeq.getEndPos();
761             int seq2End = rSeq.getEndPos();
762             int overlapEnd = seq1End;
763             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
764             
765             for (int i = overlapStart; i < overlapEnd; i++) {
766                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
767                     contig += seq1[i];
768                     contigScores.push_back(scores1[ABaseMap[i]]);
769                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
770                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
771                     if (scores2[BBaseMap[i]] < threshold) { } //
772                     else {
773                         contig += seq2[i];
774                         contigScores.push_back(scores2[BBaseMap[i]]);
775                     }
776                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
777                     if (scores1[ABaseMap[i]] < threshold) { } //
778                     else {
779                         contig += seq1[i];
780                         contigScores.push_back(scores1[ABaseMap[i]]);
781                     }
782                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
783                     char c = seq1[i];
784                     contigScores.push_back(scores1[ABaseMap[i]]);
785                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
786                     contig += c;
787                     numMismatches++;
788                 }else { //should never get here
789                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
790                 }
791             }
792             
793             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
794                 for (int i = overlapEnd; i < length; i++) {
795                     contig += seq2[i];
796                     contigScores.push_back(scores2[BBaseMap[i]]);
797                 }
798             }else { //seq2 ends before seq1 so take from overlap to length from seq1
799                 for (int i = overlapEnd; i < length; i++) {
800                     contig += seq1[i];
801                     contigScores.push_back(scores1[ABaseMap[i]]);
802                 }
803                 
804             }
805
806             if(trashCode.length() == 0){
807                 if (createGroup) {
808                     if(barcodes.size() != 0){
809                         string thisGroup = barcodeNameVector[barcodeIndex];
810                         if (primers.size() != 0) { 
811                             if (primerNameVector[primerIndex] != "") { 
812                                 if(thisGroup != "") {
813                                     thisGroup += "." + primerNameVector[primerIndex]; 
814                                 }else {
815                                     thisGroup = primerNameVector[primerIndex]; 
816                                 }
817                             } 
818                         }
819                         
820                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
821                         
822                         groupMap[fSeq.getName()] = thisGroup; 
823                         
824                         map<string, int>::iterator it = groupCounts.find(thisGroup);
825                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
826                         else { groupCounts[it->first] ++; }
827                         
828                     }
829                 }
830                 
831                 if(allFiles){
832                     ofstream output;
833                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
834                     output << ">" << fSeq.getName() << endl << contig << endl;
835                     output.close();
836                     
837                     m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
838                     output << ">" << fSeq.getName() << endl;
839                     for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
840                     output << endl;
841                     output.close();                                                     
842                 }
843                 
844                 //output
845                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
846                 outQual << ">" << fSeq.getName() << endl;
847                 for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
848                 outQual << endl;
849                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
850             }else {
851                 //output
852                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
853                 outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
854                 for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
855                 outScrapQual << endl;
856             }
857             num++;
858             
859                         //report progress
860                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
861                 }
862         
863                 //report progress
864                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
865         
866         inFFasta.close();
867         inFQual.close();
868         inRFasta.close();
869         inRQual.close();
870         outFasta.close();
871         outQual.close();
872         outScrapFasta.close();
873         outScrapQual.close();
874         outMisMatch.close();
875         delete alignment;
876         
877         if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);   m->mothurRemove(outputScrapQual); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);}
878         
879         return num;
880     }
881         catch(exception& e) {
882                 m->errorOut(e, "MakeContigsCommand", "driver");
883                 exit(1);
884         }
885 }
886 //**********************************************************************************************************************
887 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
888     try {
889         vector< vector<string> > files;
890         
891         //maps processors number to file pointer
892         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
893         map<int, vector<ofstream*> >::iterator it;
894         
895         //create files to write to
896         for (int i = 0; i < processors; i++) {
897             vector<ofstream*> temp;
898             ofstream* outFF = new ofstream;     temp.push_back(outFF);
899             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
900             ofstream* outRF = new ofstream;     temp.push_back(outRF);
901             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
902             tempfiles[i] = temp;
903             
904             vector<string> names;
905             string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
906             string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
907             string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
908             string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
909             names.push_back(ffastafilename); names.push_back(fqualfilename);
910             names.push_back(rfastafilename); names.push_back(rqualfilename);
911             files.push_back(names);
912             
913             m->openOutputFile(ffastafilename, *outFF);
914             m->openOutputFile(rfastafilename, *outRF);
915             m->openOutputFile(fqualfilename, *outFQ);
916             m->openOutputFile(rqualfilename, *outRQ);
917         }
918         
919         if (m->control_pressed) {
920             //close files, delete ofstreams
921             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
922             //remove files
923             for (int i = 0; i < files.size(); i++) {  
924                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
925             }
926         }
927         
928         ifstream inForward;
929         m->openInputFile(ffastqfile, inForward);
930         
931         ifstream inReverse;
932         m->openInputFile(rfastqfile, inReverse);
933         
934         count = 0;
935         map<string, fastqRead> uniques;
936         map<string, fastqRead>::iterator itUniques;
937         while ((!inForward.eof()) || (!inReverse.eof())) {
938             
939             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
940             
941             //get a read from forward and reverse fastq files
942             bool ignoref, ignorer;
943             fastqRead thisFread, thisRread;
944             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
945             else { ignoref = true; }
946             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
947             else { ignorer = true; }
948             
949             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
950             
951             for (int i = 0; i < reads.size(); i++) {
952                 fastqRead fread = reads[i].forward;
953                 fastqRead rread = reads[i].reverse;
954                 
955                 if (checkReads(fread, rread)) {
956                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
957                     
958                     //if the reads are okay write to output files
959                     int process = count % processors;
960                     
961                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
962                     *(tempfiles[process][1]) << ">" << fread.name << endl;
963                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
964                     *(tempfiles[process][1]) << endl;
965                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
966                     *(tempfiles[process][3]) << ">" << rread.name << endl;
967                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
968                     *(tempfiles[process][3]) << endl;
969                     
970                     count++;
971                     
972                     //report progress
973                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
974                 }
975             }
976                 }
977                 //report progress
978                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
979         
980         if (uniques.size() != 0) {
981             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
982                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
983             }
984             m->mothurOutEndLine();
985         }
986         
987         //close files, delete ofstreams
988         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
989         inForward.close();
990         inReverse.close();
991         
992         //adjust for really large processors or really small files
993         if (count == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
994         if (count < processors) { 
995             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
996             files.resize(count);
997             processors = count; 
998         }
999         
1000         return files;
1001     }
1002     catch(exception& e) {
1003         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1004         exit(1);
1005     }
1006 }
1007 //**********************************************************************************************************************
1008 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques){
1009     try {
1010         vector<pairFastqRead> reads;
1011         map<string, fastqRead>::iterator itUniques;
1012             
1013         if (!ignoref && !ignorer) {
1014             if (forward.name == reverse.name) { 
1015                 pairFastqRead temp(forward, reverse);
1016                 reads.push_back(temp);
1017             }else {
1018                 //look for forward pair
1019                 itUniques = uniques.find(forward.name);
1020                 if (itUniques != uniques.end()) {  //we have the pair for this read
1021                     pairFastqRead temp(forward, itUniques->second);
1022                     reads.push_back(temp);
1023                     uniques.erase(itUniques);
1024                 }else { //save this read for later
1025                     uniques[forward.name] = forward;
1026                 }
1027                 
1028                 //look for reverse pair
1029                 itUniques = uniques.find(reverse.name);
1030                 if (itUniques != uniques.end()) {  //we have the pair for this read
1031                     pairFastqRead temp(itUniques->second, reverse);
1032                     reads.push_back(temp);
1033                     uniques.erase(itUniques);
1034                 }else { //save this read for later
1035                     uniques[reverse.name] = reverse;
1036                 }
1037             }
1038         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1039             //look for forward pair
1040             itUniques = uniques.find(forward.name);
1041             if (itUniques != uniques.end()) {  //we have the pair for this read
1042                 pairFastqRead temp(forward, itUniques->second);
1043                 reads.push_back(temp);
1044                 uniques.erase(itUniques);
1045             }else { //save this read for later
1046                 uniques[forward.name] = forward;
1047             }
1048
1049         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1050             //look for reverse pair
1051             itUniques = uniques.find(reverse.name);
1052             if (itUniques != uniques.end()) {  //we have the pair for this read
1053                 pairFastqRead temp(itUniques->second, reverse);
1054                 reads.push_back(temp);
1055                 uniques.erase(itUniques);
1056             }else { //save this read for later
1057                 uniques[reverse.name] = reverse;
1058             }
1059         }//else ignore both and do nothing
1060         
1061         return reads;
1062     }
1063     catch(exception& e) {
1064         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1065         exit(1);
1066     }
1067 }
1068 //**********************************************************************************************************************
1069 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1070     try {
1071         fastqRead read;
1072         
1073         ignore = false;
1074         
1075         //read sequence name
1076         string line = m->getline(in); m->gobble(in);
1077         vector<string> pieces = m->splitWhiteSpace(line);
1078         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1079         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1080         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1081         else { name = name.substr(1); }
1082         
1083         //read sequence
1084         string sequence = m->getline(in); m->gobble(in);
1085         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1086         
1087         //read sequence name
1088         line = m->getline(in); m->gobble(in);
1089         pieces = m->splitWhiteSpace(line);
1090         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1091         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1092         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1093         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1094         
1095         //read quality scores
1096         string quality = m->getline(in); m->gobble(in);
1097         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1098          
1099         //sanity check sequence length and number of quality scores match
1100         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1101         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1102         
1103         vector<int> qualScores;
1104                 int controlChar = int('!');
1105                 for (int i = 0; i < quality.length(); i++) { 
1106                         int temp = int(quality[i]);
1107                         temp -= controlChar;
1108                         
1109                         qualScores.push_back(temp);
1110                 }
1111     
1112         read.name = name;
1113         read.sequence = sequence;
1114         read.scores = qualScores;
1115
1116         return read;
1117     }
1118     catch(exception& e) {
1119         m->errorOut(e, "MakeContigsCommand", "readFastq");
1120         exit(1);
1121     }
1122 }
1123 //**********************************************************************************************************************
1124 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
1125     try {
1126         bool good = true;
1127         
1128         //do sequence lengths match
1129         if (forward.sequence.length() != reverse.sequence.length()) {
1130             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ", ignoring.\n");
1131             good = false; 
1132         }
1133         
1134         //do number of qual scores match 
1135         if (forward.scores.size() != reverse.scores.size()) {
1136             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ", ignoring.\n");
1137             good = false; 
1138         }
1139
1140         return good;
1141     }
1142     catch(exception& e) {
1143         m->errorOut(e, "MakeContigsCommand", "checkReads");
1144         exit(1);
1145     }
1146 }
1147 //***************************************************************************************************************
1148 //illumina data requires paired forward and reverse data
1149 //BARCODE   atgcatgc   atgcatgc    groupName 
1150 //PRIMER   atgcatgc   atgcatgc    groupName  
1151 //PRIMER   atgcatgc   atgcatgc  
1152 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
1153         try {
1154                 ifstream in;
1155                 m->openInputFile(oligosfile, in);
1156                 
1157                 ofstream test;
1158                 
1159                 string type, foligo, roligo, group;
1160         
1161                 int indexPrimer = 0;
1162                 int indexBarcode = 0;
1163         set<string> uniquePrimers;
1164         set<string> uniqueBarcodes;
1165                 
1166                 while(!in.eof()){
1167             
1168                         in >> type; 
1169             
1170                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1171             
1172                         if(type[0] == '#'){
1173                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1174                                 m->gobble(in);
1175                         }
1176                         else{
1177                                 m->gobble(in);
1178                                 //make type case insensitive
1179                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1180                                 
1181                                 in >> foligo;
1182                 
1183                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1184                                 
1185                                 for(int i=0;i<foligo.length();i++){
1186                                         foligo[i] = toupper(foligo[i]);
1187                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1188                                 }
1189                                 
1190                                 if(type == "FORWARD"){
1191                                         m->gobble(in);
1192                                         
1193                     in >> roligo;
1194                     
1195                     for(int i=0;i<roligo.length();i++){
1196                         roligo[i] = toupper(roligo[i]);
1197                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1198                     }
1199                     //roligo = reverseOligo(roligo);
1200                     
1201                     group = "";
1202                     
1203                                         // get rest of line in case there is a primer name
1204                                         while (!in.eof())       {       
1205                                                 char c = in.get(); 
1206                                                 if (c == 10 || c == 13){        break;  }
1207                                                 else if (c == 32 || c == 9){;} //space or tab
1208                                                 else {  group += c;  }
1209                                         } 
1210                     
1211                     oligosPair newPrimer(foligo, roligo);
1212                                         
1213                                         //check for repeat barcodes
1214                     string tempPair = foligo+roligo;
1215                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1216                     else { uniquePrimers.insert(tempPair); }
1217                                         
1218                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1219                     
1220                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1221                                         primerNameVector.push_back(group);
1222                                 }else if(type == "BARCODE"){
1223                                         m->gobble(in);
1224                                         
1225                     in >> roligo;
1226                     
1227                     for(int i=0;i<roligo.length();i++){
1228                         roligo[i] = toupper(roligo[i]);
1229                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1230                     }
1231                     //roligo = reverseOligo(roligo);
1232                     
1233                     oligosPair newPair(foligo, roligo);
1234                     
1235                     group = "";
1236                     while (!in.eof())   {       
1237                                                 char c = in.get(); 
1238                                                 if (c == 10 || c == 13){        break;  }
1239                                                 else if (c == 32 || c == 9){;} //space or tab
1240                                                 else {  group += c;  }
1241                                         } 
1242                                         
1243                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1244                         
1245                     //check for repeat barcodes
1246                     string tempPair = foligo+roligo;
1247                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1248                     else { uniqueBarcodes.insert(tempPair); }
1249                         
1250                     barcodes[indexBarcode]=newPair; indexBarcode++;
1251                                         barcodeNameVector.push_back(group);
1252                                 }else if(type == "LINKER"){
1253                                         linker.push_back(foligo);
1254                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
1255                                 }else if(type == "SPACER"){
1256                                         spacer.push_back(foligo);
1257                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
1258                                 }
1259                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
1260                         }
1261                         m->gobble(in);
1262                 }       
1263                 in.close();
1264                 
1265                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1266                 
1267                 //add in potential combos
1268                 if(barcodeNameVector.size() == 0){
1269             oligosPair temp("", "");
1270                         barcodes[0] = temp;
1271                         barcodeNameVector.push_back("");                        
1272                 }
1273                 
1274                 if(primerNameVector.size() == 0){
1275             oligosPair temp("", "");
1276                         primers[0] = temp;
1277                         primerNameVector.push_back("");                 
1278                 }
1279                 
1280                 fastaFileNames.resize(barcodeNameVector.size());
1281                 for(int i=0;i<fastaFileNames.size();i++){
1282                         fastaFileNames[i].assign(primerNameVector.size(), "");
1283                 }
1284                 qualFileNames = fastaFileNames; 
1285                 
1286                 if(allFiles){
1287                         set<string> uniqueNames; //used to cleanup outputFileNames
1288                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1289                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1290                                         
1291                                         string primerName = primerNameVector[itPrimer->first];
1292                                         string barcodeName = barcodeNameVector[itBar->first];
1293                                         
1294                                         string comboGroupName = "";
1295                                         string fastaFileName = "";
1296                                         string qualFileName = "";
1297                                         string nameFileName = "";
1298                     string countFileName = "";
1299                                         
1300                                         if(primerName == ""){
1301                                                 comboGroupName = barcodeNameVector[itBar->first];
1302                                         }
1303                                         else{
1304                                                 if(barcodeName == ""){
1305                                                         comboGroupName = primerNameVector[itPrimer->first];
1306                                                 }
1307                                                 else{
1308                                                         comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1309                                                 }
1310                                         }
1311                                         
1312                                         
1313                                         ofstream temp;
1314                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".fasta";
1315                                         if (uniqueNames.count(fastaFileName) == 0) {
1316                                                 outputNames.push_back(fastaFileName);
1317                                                 outputTypes["fasta"].push_back(fastaFileName);
1318                                                 uniqueNames.insert(fastaFileName);
1319                                         }
1320                                         
1321                                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1322                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1323                                         
1324                                         
1325                     qualFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".qual";
1326                     if (uniqueNames.count(qualFileName) == 0) {
1327                         outputNames.push_back(qualFileName);
1328                         outputTypes["qfile"].push_back(qualFileName);
1329                     }
1330                                                 
1331                     qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1332                     m->openOutputFile(qualFileName, temp);              temp.close();
1333                                 }
1334                         }
1335                 }
1336                 
1337                 bool allBlank = true;
1338                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1339                         if (barcodeNameVector[i] != "") {
1340                                 allBlank = false;
1341                                 break;
1342                         }
1343                 }
1344                 for (int i = 0; i < primerNameVector.size(); i++) {
1345                         if (primerNameVector[i] != "") {
1346                                 allBlank = false;
1347                                 break;
1348                         }
1349                 }
1350         
1351                 if (allBlank) {
1352                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1353                         allFiles = false;
1354                         return false;
1355                 }
1356                 
1357                 return true;
1358                 
1359         }
1360         catch(exception& e) {
1361                 m->errorOut(e, "MakeContigsCommand", "getOligos");
1362                 exit(1);
1363         }
1364 }
1365 //********************************************************************/
1366 string MakeContigsCommand::reverseOligo(string oligo){
1367         try {
1368         string reverse = "";
1369         
1370         for(int i=oligo.length()-1;i>=0;i--){
1371             
1372             if(oligo[i] == 'A')         {       reverse += 'T'; }
1373             else if(oligo[i] == 'T'){   reverse += 'A'; }
1374             else if(oligo[i] == 'U'){   reverse += 'A'; }
1375             
1376             else if(oligo[i] == 'G'){   reverse += 'C'; }
1377             else if(oligo[i] == 'C'){   reverse += 'G'; }
1378             
1379             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1380             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1381             
1382             else if(oligo[i] == 'M'){   reverse += 'K'; }
1383             else if(oligo[i] == 'K'){   reverse += 'M'; }
1384             
1385             else if(oligo[i] == 'W'){   reverse += 'W'; }
1386             else if(oligo[i] == 'S'){   reverse += 'S'; }
1387             
1388             else if(oligo[i] == 'B'){   reverse += 'V'; }
1389             else if(oligo[i] == 'V'){   reverse += 'B'; }
1390             
1391             else if(oligo[i] == 'D'){   reverse += 'H'; }
1392             else if(oligo[i] == 'H'){   reverse += 'D'; }
1393             
1394             else                                                {       reverse += 'N'; }
1395         }
1396         
1397         
1398         return reverse;
1399     }
1400         catch(exception& e) {
1401                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
1402                 exit(1);
1403         }
1404 }
1405 //**********************************************************************************************************************
1406
1407
1408
1409