]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
added sets to amova and homova commands. added oligos to make.contigs. added metadat...
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
16         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
17                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
18                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
19 //        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
20 //              CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
21         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
22
23         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
24         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
25                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
26                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
27                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
28                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
29         CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
30                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "MakeContigsCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string MakeContigsCommand::getHelpString(){     
45         try {
46                 string helpString = "";
47                 helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
48         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
49                 helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
50                 helpString += "The ffastq and rfastq parameters are required.\n";
51                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
52         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
53                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
54                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
55         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
56                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
57                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
58                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
59                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
60                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
61         helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
62         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
63         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
64         helpString += "The make.contigs command should be in the following format: \n";
65                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
66                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
67                 return helpString;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""){      
76         try {
77         string outputFileName = "";
78                 map<string, vector<string> >::iterator it;
79         
80         //is this a type this command creates
81         it = outputTypes.find(type);
82         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
83         else {
84             if (type == "fasta")             {   outputFileName =  "contigs.fasta";         }
85             else if (type == "qfile")        {   outputFileName =  "contigs.qual";          }
86             else if (type == "group")            {   outputFileName =  "groups";   }
87             else if (type == "mismatch")     {   outputFileName =  "contigs.mismatch";      }
88             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
89         }
90         return outputFileName;
91         }
92         catch(exception& e) {
93                 m->errorOut(e, "MakeContigsCommand", "getOutputFileNameTag");
94                 exit(1);
95         }
96 }
97 //**********************************************************************************************************************
98 MakeContigsCommand::MakeContigsCommand(){       
99         try {
100                 abort = true; calledHelp = true; 
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["fasta"] = tempOutNames;
104                 outputTypes["qfile"] = tempOutNames;
105         outputTypes["group"] = tempOutNames;
106         outputTypes["mismatch"] = tempOutNames;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 MakeContigsCommand::MakeContigsCommand(string option)  {
115         try {
116                 abort = false; calledHelp = false;   
117         
118                 //allow user to run help
119                 if(option == "help") { help(); abort = true; calledHelp = true; }
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121                 
122                 else {
123                         vector<string> myArray = setParameters();
124                         
125                         OptionParser parser(option);
126                         map<string, string> parameters = parser.getParameters(); 
127                         
128                         ValidParameters validParameter("pairwise.seqs");
129                         map<string, string>::iterator it;
130                         
131                         //check to make sure all parameters are valid for command
132                         for (it = parameters.begin(); it != parameters.end(); it++) { 
133                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
134                         }
135                         
136                         //initialize outputTypes
137                         vector<string> tempOutNames;
138                         outputTypes["fasta"] = tempOutNames;
139                         outputTypes["qfile"] = tempOutNames;
140             outputTypes["mismatch"] = tempOutNames;
141             outputTypes["group"] = tempOutNames;
142                         
143             
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         else { 
148                                 string path;
149                 it = parameters.find("ffastq");
150                                 //user has given a template file
151                                 if(it != parameters.end()){ 
152                                         path = m->hasPath(it->second);
153                                         //if the user has not given a path then, add inputdir. else leave path alone.
154                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
155                                 }
156                 
157                 it = parameters.find("rfastq");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
163                                 }
164                 
165                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172             }
173             
174             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
175                         if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
176                         else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
177                         
178                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
179                         if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
180                         else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
181             
182             oligosfile = validParameter.validFile(parameters, "oligos", true);
183                         if (oligosfile == "not found")      {   oligosfile = "";        }
184                         else if(oligosfile == "not open")   {   abort = true;       } 
185                         else {   m->setOligosFile(oligosfile);          }
186             
187             //if the user changes the output directory command factory will send this info to us in the output parameter 
188                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
189                         
190
191                         //check for optional parameter and set defaults
192                         // ...at some point should added some additional type checking...
193                         string temp;
194                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
195                         m->mothurConvert(temp, match);  
196                         
197                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
198                         m->mothurConvert(temp, misMatch);  
199             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
200                         
201                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
202                         m->mothurConvert(temp, gapOpen);  
203             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
204                         
205                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
206                         m->mothurConvert(temp, gapExtend); 
207             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
208                         
209             temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "40";                    }
210                         m->mothurConvert(temp, threshold); 
211             if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
212
213                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
214                         m->setProcessors(temp);
215                         m->mothurConvert(temp, processors);
216             
217             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
218                         m->mothurConvert(temp, bdiffs);
219                         
220                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
221                         m->mothurConvert(temp, pdiffs);
222             
223  //           temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
224 //                      m->mothurConvert(temp, ldiffs);
225             
226  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
227 //              m->mothurConvert(temp, sdiffs);
228                         
229                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
230                         m->mothurConvert(temp, tdiffs);
231                         
232                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
233
234             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
235                         allFiles = m->isTrue(temp);
236                         
237                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
238                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
239         }
240                 
241         }
242         catch(exception& e) {
243                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
244                 exit(1);
245         }
246 }
247 //**********************************************************************************************************************
248 int MakeContigsCommand::execute(){
249         try {
250                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
251         
252         //read ffastq and rfastq files creating fasta and qual files.
253         //this function will create a forward and reverse, fasta and qual files for each processor.
254         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
255         int numReads = 0;
256         int start = time(NULL);
257         longestBase = 1000;
258         m->mothurOut("Reading fastq data...\n"); 
259         vector< vector<string> > files = readFastqFiles(numReads);  
260         m->mothurOut("Done.\n");
261     
262         if (m->control_pressed) { return 0; }
263         
264         vector<vector<string> > fastaFileNames;
265                 vector<vector<string> > qualFileNames;
266         createGroup = false;
267         string outputGroupFileName;
268         if(oligosfile != ""){
269                         createGroup = getOligos(fastaFileNames, qualFileNames);
270             if (createGroup) { 
271                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("group");
272                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
273             }
274                 }
275         
276         string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("fasta");
277         string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("qfile");
278         string outScrapFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("fasta");
279         string outScrapQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("qfile");
280
281         string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
282         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
283         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
284         outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
285         outputNames.push_back(outScrapQualFile); outputTypes["qfile"].push_back(outScrapQualFile);
286         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
287         
288         m->mothurOut("Making contigs...\n"); 
289         createProcesses(files, outFastaFile, outQualFile, outScrapFastaFile, outScrapQualFile, outMisMatchFile, fastaFileNames, qualFileNames);
290         m->mothurOut("Done.\n");
291         
292         //remove temp fasta and qual files
293         for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
294         
295         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
296         
297         if(allFiles){
298                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
299                         map<string, string>::iterator it;
300                         set<string> namesToRemove;
301                         for(int i=0;i<fastaFileNames.size();i++){
302                                 for(int j=0;j<fastaFileNames[0].size();j++){
303                                         if (fastaFileNames[i][j] != "") {
304                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
305                                                         if(m->isBlank(fastaFileNames[i][j])){
306                                                                 m->mothurRemove(fastaFileNames[i][j]);
307                                                                 namesToRemove.insert(fastaFileNames[i][j]);
308
309                                 m->mothurRemove(qualFileNames[i][j]);
310                                 namesToRemove.insert(qualFileNames[i][j]);
311                                                         }else{  
312                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
313                                                                 if (it == uniqueFastaNames.end()) {     
314                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
315                                                                 }       
316                                                         }
317                                                 }
318                                         }
319                                 }
320                         }
321                         
322                         //remove names for outputFileNames, just cleans up the output
323                         vector<string> outputNames2;
324                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
325                         outputNames = outputNames2;
326                         
327             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
328                 ifstream in;
329                 m->openInputFile(it->first, in);
330                 
331                 ofstream out;
332                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
333                 thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
334                 m->openOutputFile(thisGroupName, out);
335                  
336                 while (!in.eof()){
337                     if (m->control_pressed) { break; }
338                     
339                     Sequence currSeq(in); m->gobble(in);
340                     out << currSeq.getName() << '\t' << it->second << endl;  
341                 }
342                 in.close();
343                 out.close();
344             }
345         }
346         
347         if (createGroup) {
348             ofstream outGroup;
349             m->openOutputFile(outputGroupFileName, outGroup);
350             for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
351                 outGroup << itGroup->first << '\t' << itGroup->second << endl;
352             }
353             outGroup.close();
354         }
355         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
356         
357         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
358         
359                 //output group counts
360                 m->mothurOutEndLine();
361                 int total = 0;
362                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
363                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
364             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
365                 }
366                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
367                 
368                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
369         
370         string currentFasta = "";
371                 itTypes = outputTypes.find("fasta");
372                 if (itTypes != outputTypes.end()) {
373                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
374                 }
375         
376         string currentQual = "";
377                 itTypes = outputTypes.find("qfile");
378                 if (itTypes != outputTypes.end()) {
379                         if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
380                 }
381         
382         string currentGroup = "";
383                 itTypes = outputTypes.find("group");
384                 if (itTypes != outputTypes.end()) {
385                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
386                 }
387                 
388         //output files created by command
389                 m->mothurOutEndLine();
390                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
391                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
392                 m->mothurOutEndLine();
393
394         return 0;
395     }
396         catch(exception& e) {
397                 m->errorOut(e, "MakeContigsCommand", "execute");
398                 exit(1);
399         }
400 }
401 //**********************************************************************************************************************
402 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
403         try {
404                 int num = 0;
405                 vector<int> processIDS;
406 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
407                 int process = 0;
408                 
409                 //loop through and create all the processes you want
410                 while (process != processors-1) {
411                         int pid = fork();
412                         
413                         if (pid > 0) {
414                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
415                                 process++;
416                         }else if (pid == 0){
417                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
418                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
419                 
420                                 if(allFiles){
421                                         ofstream temp;
422                     
423                                         for(int i=0;i<tempFASTAFileNames.size();i++){
424                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
425                                                         if (tempFASTAFileNames[i][j] != "") {
426                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
427                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
428                                 
429                                 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
430                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
431                                                         }
432                                                 }
433                                         }
434                                 }
435
436                                 num = driver(files[process], 
437                              outputFasta + toString(getpid()) + ".temp", 
438                              outputQual + toString(getpid()) + ".temp", 
439                              outputScrapFasta + toString(getpid()) + ".temp", 
440                              outputScrapQual + toString(getpid()) + ".temp",
441                              outputMisMatches + toString(getpid()) + ".temp",
442                              tempFASTAFileNames,
443                              tempPrimerQualFileNames);
444                                 
445                                 //pass groupCounts to parent
446                 ofstream out;
447                 string tempFile = toString(getpid()) + ".num.temp";
448                 m->openOutputFile(tempFile, out);
449                 out << num << endl;
450                                 if(createGroup){
451                                         out << groupCounts.size() << endl;
452                                         
453                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
454                                                 out << it->first << '\t' << it->second << endl;
455                                         }
456                     
457                     out << groupMap.size() << endl;
458                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
459                                                 out << it->first << '\t' << it->second << endl;
460                                         }
461                                 }
462                 out.close();
463                                 
464                                 exit(0);
465                         }else { 
466                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
467                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
468                                 exit(0);
469                         }
470                 }
471                 
472         ofstream temp;
473                 m->openOutputFile(outputFasta, temp);           temp.close();
474                 m->openOutputFile(outputQual, temp);    temp.close();
475         m->openOutputFile(outputScrapFasta, temp);              temp.close();
476         m->openOutputFile(outputScrapQual, temp);               temp.close();
477         
478                 //do my part
479                 num = driver(files[processors-1], outputFasta, outputQual, outputScrapFasta, outputScrapQual, outputMisMatches, fastaFileNames, qualFileNames);
480                 
481                 //force parent to wait until all the processes are done
482                 for (int i=0;i<processIDS.size();i++) { 
483                         int temp = processIDS[i];
484                         wait(&temp);
485                 }
486         
487                 for (int i = 0; i < processIDS.size(); i++) {
488             ifstream in;
489             string tempFile = toString(processIDS[i]) + ".num.temp";
490             m->openInputFile(tempFile, in);
491             int tempNum;
492             in >> tempNum; num += tempNum; m->gobble(in);
493             
494                         if(createGroup){
495                                 string group;
496                                 in >> tempNum; m->gobble(in);
497                                 
498                                 if (tempNum != 0) {
499                                         for (int j = 0; j < tempNum; j++) { 
500                         int groupNum;
501                                                 in >> group >> groupNum; m->gobble(in);
502                         
503                                                 map<string, int>::iterator it = groupCounts.find(group);
504                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
505                                                 else { groupCounts[it->first] += groupNum; }
506                                         }
507                                 }
508                 in >> tempNum; m->gobble(in);
509                 if (tempNum != 0) {
510                                         for (int j = 0; j < tempNum; j++) { 
511                         string group, seqName;
512                                                 in >> seqName >> group; m->gobble(in);
513                         
514                                                 map<string, string>::iterator it = groupMap.find(seqName);
515                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
516                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
517                                         }
518                                 }
519                         }
520             in.close(); m->mothurRemove(tempFile);
521         }
522     #else
523         
524         //////////////////////////////////////////////////////////////////////////////////////////////////////
525                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
526                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
527                 //////////////////////////////////////////////////////////////////////////////////////////////////////
528                 
529                 vector<contigsData*> pDataArray; 
530                 DWORD   dwThreadIdArray[processors-1];
531                 HANDLE  hThreadArray[processors-1]; 
532                 
533                 //Create processor worker threads.
534                 for( int h=0; h<processors-1; h++ ){
535                         string extension = "";
536                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
537             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
538             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
539             
540             if(allFiles){
541                 ofstream temp;
542                 
543                 for(int i=0;i<tempFASTAFileNames.size();i++){
544                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
545                         if (tempFASTAFileNames[i][j] != "") {
546                             tempFASTAFileNames[i][j] += extension;
547                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
548                             
549                             
550                             tempPrimerQualFileNames[i][j] += extension;
551                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
552                         }
553                     }
554                 }
555             }
556
557                                   
558                         contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputQual + extension), (outputScrapFasta + extension), (outputScrapQual + extension),(outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, barcodes, primers, tempFASTAFileNames, tempPrimerQualFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
559                         pDataArray.push_back(tempcontig);
560             
561                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
562                 }
563         
564         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
565         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
566
567         if(allFiles){
568             ofstream temp;
569             string extension = toString(processors-1) + ".temp";
570             
571             for(int i=0;i<tempFASTAFileNames.size();i++){
572                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
573                     if (tempFASTAFileNames[i][j] != "") {
574                         tempFASTAFileNames[i][j] += extension;
575                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
576                         
577                         
578                         tempPrimerQualFileNames[i][j] += extension;
579                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
580                     }
581                 }
582             }
583         }
584
585                 //parent do my part
586                 ofstream temp;
587                 m->openOutputFile(outputFasta, temp);           temp.close();
588                 m->openOutputFile(outputQual, temp);    temp.close();
589         m->openOutputFile(outputScrapFasta, temp);              temp.close();
590         m->openOutputFile(outputScrapQual, temp);               temp.close();
591                 
592         
593         //do my part
594                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputQual+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputScrapQual+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames);       
595         
596                 //Wait until all threads have terminated.
597                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
598                 
599                 //Close all thread handles and free memory allocations.
600                 for(int i=0; i < pDataArray.size(); i++){
601                         num += pDataArray[i]->count;
602             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
603                 map<string, int>::iterator it2 = groupCounts.find(it->first);
604                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
605                 else { groupCounts[it->first] += it->second; }
606             }
607             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
608                 map<string, string>::iterator it2 = groupMap.find(it->first);
609                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
610                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
611             }
612             CloseHandle(hThreadArray[i]);
613                         delete pDataArray[i];
614         }
615                                 
616     #endif      
617         
618         for (int i = 0; i < processIDS.size(); i++) {
619                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
620                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
621                         
622                         m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
623                         m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
624             
625             m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
626                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
627                         
628                         m->appendFiles((outputScrapQual + toString(processIDS[i]) + ".temp"), outputScrapQual);
629                         m->mothurRemove((outputScrapQual + toString(processIDS[i]) + ".temp"));
630             
631             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
632                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
633             
634             if(allFiles){
635                                 for(int j=0;j<fastaFileNames.size();j++){
636                                         for(int k=0;k<fastaFileNames[j].size();k++){
637                                                 if (fastaFileNames[j][k] != "") {
638                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
639                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
640                                                         
641                             m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
642                             m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
643                                                 }
644                                         }
645                                 }
646                         }
647                 }
648                 
649                 return num;
650         }
651         catch(exception& e) {
652                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
653                 exit(1);
654         }
655 }
656 //**********************************************************************************************************************
657 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames){
658     try {
659         
660         Alignment* alignment;
661         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
662                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
663         
664         int num = 0;
665         string thisffastafile = files[0];
666         string thisfqualfile = files[1];
667         string thisrfastafile = files[2];
668         string thisrqualfile = files[3];
669         
670         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
671         
672         ifstream inFFasta, inRFasta, inFQual, inRQual;
673         m->openInputFile(thisffastafile, inFFasta);
674         m->openInputFile(thisfqualfile, inFQual);
675         m->openInputFile(thisrfastafile, inRFasta);
676         m->openInputFile(thisrqualfile, inRQual);
677         
678         ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
679         m->openOutputFile(outputFasta, outFasta);
680         m->openOutputFile(outputQual, outQual);
681         m->openOutputFile(outputScrapFasta, outScrapFasta);
682         m->openOutputFile(outputScrapQual, outScrapQual);
683         m->openOutputFile(outputMisMatches, outMisMatch);
684         outMisMatch << "Name\tLength\tMisMatches\n";
685         
686         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
687         
688         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
689             
690             if (m->control_pressed) { break; }
691             
692             int success = 1;
693             string trashCode = "";
694             int currentSeqsDiffs = 0;
695
696             //read seqs and quality info
697             Sequence fSeq(inFFasta); m->gobble(inFFasta);
698             Sequence rSeq(inRFasta); m->gobble(inRFasta);
699             QualityScores fQual(inFQual); m->gobble(inFQual);
700             QualityScores rQual(inRQual); m->gobble(inRQual);
701             
702             int barcodeIndex = 0;
703             int primerIndex = 0;
704             
705             if(barcodes.size() != 0){
706                 success = trimOligos.stripBarcode(fSeq, rSeq, fQual, rQual, barcodeIndex);
707                 if(success > bdiffs)            {       trashCode += 'b';       }
708                 else{ currentSeqsDiffs += success;  }
709             }
710             
711             if(primers.size() != 0){
712                 success = trimOligos.stripForward(fSeq, rSeq, fQual, rQual, primerIndex);
713                 if(success > pdiffs)            {       trashCode += 'f';       }
714                 else{ currentSeqsDiffs += success;  }
715             }
716             
717             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
718             
719             //flip the reverse reads
720             rSeq.reverseComplement();
721             rQual.flipQScores();
722
723             //pairwise align
724             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
725             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
726             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
727             fSeq.setAligned(alignment->getSeqAAln());
728             rSeq.setAligned(alignment->getSeqBAln());
729             int length = fSeq.getAligned().length();
730             
731             //traverse alignments merging into one contiguous seq
732             string contig = "";
733             vector<int> contigScores; 
734             int numMismatches = 0;
735             string seq1 = fSeq.getAligned();
736             string seq2 = rSeq.getAligned();
737             vector<int> scores1 = fQual.getQualityScores();
738             vector<int> scores2 = rQual.getQualityScores();
739             
740             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
741             int overlapStart = fSeq.getStartPos();
742             int seq2Start = rSeq.getStartPos();
743             //bigger of the 2 starting positions is the location of the overlapping start
744             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
745                 overlapStart = seq2Start; 
746                 for (int i = 0; i < overlapStart; i++) {
747                     contig += seq1[i];
748                     contigScores.push_back(scores1[ABaseMap[i]]);
749                 }
750             }else { //seq1 starts later so take from 0 to overlapStart from seq2
751                 for (int i = 0; i < overlapStart; i++) {
752                     contig += seq2[i];
753                     contigScores.push_back(scores2[BBaseMap[i]]);
754                 }
755             }
756             
757             int seq1End = fSeq.getEndPos();
758             int seq2End = rSeq.getEndPos();
759             int overlapEnd = seq1End;
760             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
761             
762             for (int i = overlapStart; i < overlapEnd; i++) {
763                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
764                     contig += seq1[i];
765                     contigScores.push_back(scores1[ABaseMap[i]]);
766                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
767                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
768                     if (scores2[BBaseMap[i]] < threshold) { } //
769                     else {
770                         contig += seq2[i];
771                         contigScores.push_back(scores2[BBaseMap[i]]);
772                     }
773                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
774                     if (scores1[ABaseMap[i]] < threshold) { } //
775                     else {
776                         contig += seq1[i];
777                         contigScores.push_back(scores1[ABaseMap[i]]);
778                     }
779                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
780                     char c = seq1[i];
781                     contigScores.push_back(scores1[ABaseMap[i]]);
782                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
783                     contig += c;
784                     numMismatches++;
785                 }else { //should never get here
786                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
787                 }
788             }
789             
790             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
791                 for (int i = overlapEnd; i < length; i++) {
792                     contig += seq2[i];
793                     contigScores.push_back(scores2[BBaseMap[i]]);
794                 }
795             }else { //seq2 ends before seq1 so take from overlap to length from seq1
796                 for (int i = overlapEnd; i < length; i++) {
797                     contig += seq1[i];
798                     contigScores.push_back(scores1[ABaseMap[i]]);
799                 }
800                 
801             }
802
803             if(trashCode.length() == 0){
804                 if (createGroup) {
805                     if(barcodes.size() != 0){
806                         string thisGroup = barcodeNameVector[barcodeIndex];
807                         if (primers.size() != 0) { 
808                             if (primerNameVector[primerIndex] != "") { 
809                                 if(thisGroup != "") {
810                                     thisGroup += "." + primerNameVector[primerIndex]; 
811                                 }else {
812                                     thisGroup = primerNameVector[primerIndex]; 
813                                 }
814                             } 
815                         }
816                         
817                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
818                         
819                         groupMap[fSeq.getName()] = thisGroup; 
820                         
821                         map<string, int>::iterator it = groupCounts.find(thisGroup);
822                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
823                         else { groupCounts[it->first] ++; }
824                         
825                     }
826                 }
827                 
828                 if(allFiles){
829                     ofstream output;
830                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
831                     output << ">" << fSeq.getName() << endl << contig << endl;
832                     output.close();
833                     
834                     m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
835                     output << ">" << fSeq.getName() << endl;
836                     for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
837                     output << endl;
838                     output.close();                                                     
839                 }
840                 
841                 //output
842                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
843                 outQual << ">" << fSeq.getName() << endl;
844                 for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
845                 outQual << endl;
846                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
847             }else {
848                 //output
849                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
850                 outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
851                 for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
852                 outScrapQual << endl;
853             }
854             num++;
855             
856                         //report progress
857                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
858                 }
859         
860                 //report progress
861                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
862         
863         inFFasta.close();
864         inFQual.close();
865         inRFasta.close();
866         inRQual.close();
867         outFasta.close();
868         outQual.close();
869         outScrapFasta.close();
870         outScrapQual.close();
871         outMisMatch.close();
872         delete alignment;
873         
874         if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);   m->mothurRemove(outputScrapQual); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);}
875         
876         return num;
877     }
878         catch(exception& e) {
879                 m->errorOut(e, "MakeContigsCommand", "driver");
880                 exit(1);
881         }
882 }
883 //**********************************************************************************************************************
884 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
885     try {
886         vector< vector<string> > files;
887         
888         //maps processors number to file pointer
889         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
890         map<int, vector<ofstream*> >::iterator it;
891         
892         //create files to write to
893         for (int i = 0; i < processors; i++) {
894             vector<ofstream*> temp;
895             ofstream* outFF = new ofstream;     temp.push_back(outFF);
896             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
897             ofstream* outRF = new ofstream;     temp.push_back(outRF);
898             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
899             tempfiles[i] = temp;
900             
901             vector<string> names;
902             string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
903             string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
904             string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
905             string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
906             names.push_back(ffastafilename); names.push_back(fqualfilename);
907             names.push_back(rfastafilename); names.push_back(rqualfilename);
908             files.push_back(names);
909             
910             m->openOutputFile(ffastafilename, *outFF);
911             m->openOutputFile(rfastafilename, *outRF);
912             m->openOutputFile(fqualfilename, *outFQ);
913             m->openOutputFile(rqualfilename, *outRQ);
914         }
915         
916         if (m->control_pressed) {
917             //close files, delete ofstreams
918             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
919             //remove files
920             for (int i = 0; i < files.size(); i++) {  
921                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
922             }
923         }
924         
925         ifstream inForward;
926         m->openInputFile(ffastqfile, inForward);
927         
928         ifstream inReverse;
929         m->openInputFile(rfastqfile, inReverse);
930         
931         count = 0;
932         map<string, fastqRead> uniques;
933         map<string, fastqRead>::iterator itUniques;
934         while ((!inForward.eof()) || (!inReverse.eof())) {
935             
936             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
937             
938             //get a read from forward and reverse fastq files
939             bool ignoref, ignorer;
940             fastqRead thisFread, thisRread;
941             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
942             else { ignoref = true; }
943             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
944             else { ignorer = true; }
945             
946             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
947             
948             for (int i = 0; i < reads.size(); i++) {
949                 fastqRead fread = reads[i].forward;
950                 fastqRead rread = reads[i].reverse;
951                 
952                 if (checkReads(fread, rread)) {
953                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
954                     
955                     //if the reads are okay write to output files
956                     int process = count % processors;
957                     
958                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
959                     *(tempfiles[process][1]) << ">" << fread.name << endl;
960                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
961                     *(tempfiles[process][1]) << endl;
962                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
963                     *(tempfiles[process][3]) << ">" << rread.name << endl;
964                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
965                     *(tempfiles[process][3]) << endl;
966                     
967                     count++;
968                     
969                     //report progress
970                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
971                 }
972             }
973                 }
974                 //report progress
975                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
976         
977         if (uniques.size() != 0) {
978             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
979                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
980             }
981             m->mothurOutEndLine();
982         }
983         
984         //close files, delete ofstreams
985         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
986         inForward.close();
987         inReverse.close();
988         
989         //adjust for really large processors or really small files
990         if (count == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
991         if (count < processors) { 
992             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
993             files.resize(count);
994             processors = count; 
995         }
996         
997         return files;
998     }
999     catch(exception& e) {
1000         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1001         exit(1);
1002     }
1003 }
1004 //**********************************************************************************************************************
1005 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques){
1006     try {
1007         vector<pairFastqRead> reads;
1008         map<string, fastqRead>::iterator itUniques;
1009             
1010         if (!ignoref && !ignorer) {
1011             if (forward.name == reverse.name) { 
1012                 pairFastqRead temp(forward, reverse);
1013                 reads.push_back(temp);
1014             }else {
1015                 //look for forward pair
1016                 itUniques = uniques.find(forward.name);
1017                 if (itUniques != uniques.end()) {  //we have the pair for this read
1018                     pairFastqRead temp(forward, itUniques->second);
1019                     reads.push_back(temp);
1020                     uniques.erase(itUniques);
1021                 }else { //save this read for later
1022                     uniques[forward.name] = forward;
1023                 }
1024                 
1025                 //look for reverse pair
1026                 itUniques = uniques.find(reverse.name);
1027                 if (itUniques != uniques.end()) {  //we have the pair for this read
1028                     pairFastqRead temp(itUniques->second, reverse);
1029                     reads.push_back(temp);
1030                     uniques.erase(itUniques);
1031                 }else { //save this read for later
1032                     uniques[reverse.name] = reverse;
1033                 }
1034             }
1035         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1036             //look for forward pair
1037             itUniques = uniques.find(forward.name);
1038             if (itUniques != uniques.end()) {  //we have the pair for this read
1039                 pairFastqRead temp(forward, itUniques->second);
1040                 reads.push_back(temp);
1041                 uniques.erase(itUniques);
1042             }else { //save this read for later
1043                 uniques[forward.name] = forward;
1044             }
1045
1046         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1047             //look for reverse pair
1048             itUniques = uniques.find(reverse.name);
1049             if (itUniques != uniques.end()) {  //we have the pair for this read
1050                 pairFastqRead temp(itUniques->second, reverse);
1051                 reads.push_back(temp);
1052                 uniques.erase(itUniques);
1053             }else { //save this read for later
1054                 uniques[reverse.name] = reverse;
1055             }
1056         }//else ignore both and do nothing
1057         
1058         return reads;
1059     }
1060     catch(exception& e) {
1061         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1062         exit(1);
1063     }
1064 }
1065 //**********************************************************************************************************************
1066 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1067     try {
1068         fastqRead read;
1069         
1070         ignore = false;
1071         
1072         //read sequence name
1073         string line = m->getline(in); m->gobble(in);
1074         vector<string> pieces = m->splitWhiteSpace(line);
1075         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1076         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1077         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1078         else { name = name.substr(1); }
1079         
1080         //read sequence
1081         string sequence = m->getline(in); m->gobble(in);
1082         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1083         
1084         //read sequence name
1085         line = m->getline(in); m->gobble(in);
1086         pieces = m->splitWhiteSpace(line);
1087         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1088         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1089         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1090         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1091         
1092         //read quality scores
1093         string quality = m->getline(in); m->gobble(in);
1094         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1095          
1096         //sanity check sequence length and number of quality scores match
1097         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1098         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1099         
1100         vector<int> qualScores;
1101                 int controlChar = int('!');
1102                 for (int i = 0; i < quality.length(); i++) { 
1103                         int temp = int(quality[i]);
1104                         temp -= controlChar;
1105                         
1106                         qualScores.push_back(temp);
1107                 }
1108     
1109         read.name = name;
1110         read.sequence = sequence;
1111         read.scores = qualScores;
1112
1113         return read;
1114     }
1115     catch(exception& e) {
1116         m->errorOut(e, "MakeContigsCommand", "readFastq");
1117         exit(1);
1118     }
1119 }
1120 //**********************************************************************************************************************
1121 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
1122     try {
1123         bool good = true;
1124         
1125         //do sequence lengths match
1126         if (forward.sequence.length() != reverse.sequence.length()) {
1127             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ", ignoring.\n");
1128             good = false; 
1129         }
1130         
1131         //do number of qual scores match 
1132         if (forward.scores.size() != reverse.scores.size()) {
1133             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ", ignoring.\n");
1134             good = false; 
1135         }
1136
1137         return good;
1138     }
1139     catch(exception& e) {
1140         m->errorOut(e, "MakeContigsCommand", "checkReads");
1141         exit(1);
1142     }
1143 }
1144 //***************************************************************************************************************
1145 //illumina data requires paired forward and reverse data
1146 //BARCODE   atgcatgc   atgcatgc    groupName 
1147 //PRIMER   atgcatgc   atgcatgc    groupName  
1148 //PRIMER   atgcatgc   atgcatgc  
1149 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
1150         try {
1151                 ifstream in;
1152                 m->openInputFile(oligosfile, in);
1153                 
1154                 ofstream test;
1155                 
1156                 string type, foligo, roligo, group;
1157         
1158                 int indexPrimer = 0;
1159                 int indexBarcode = 0;
1160         set<string> uniquePrimers;
1161         set<string> uniqueBarcodes;
1162                 
1163                 while(!in.eof()){
1164             
1165                         in >> type; 
1166             
1167                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1168             
1169                         if(type[0] == '#'){
1170                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1171                                 m->gobble(in);
1172                         }
1173                         else{
1174                                 m->gobble(in);
1175                                 //make type case insensitive
1176                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1177                                 
1178                                 in >> foligo;
1179                 
1180                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1181                                 
1182                                 for(int i=0;i<foligo.length();i++){
1183                                         foligo[i] = toupper(foligo[i]);
1184                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1185                                 }
1186                                 
1187                                 if(type == "FORWARD"){
1188                                         m->gobble(in);
1189                                         
1190                     in >> roligo;
1191                     
1192                     for(int i=0;i<roligo.length();i++){
1193                         roligo[i] = toupper(roligo[i]);
1194                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1195                     }
1196                     //roligo = reverseOligo(roligo);
1197                     
1198                     group = "";
1199                     
1200                                         // get rest of line in case there is a primer name
1201                                         while (!in.eof())       {       
1202                                                 char c = in.get(); 
1203                                                 if (c == 10 || c == 13){        break;  }
1204                                                 else if (c == 32 || c == 9){;} //space or tab
1205                                                 else {  group += c;  }
1206                                         } 
1207                     
1208                     oligosPair newPrimer(foligo, roligo);
1209                                         
1210                                         //check for repeat barcodes
1211                     string tempPair = foligo+roligo;
1212                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1213                     else { uniquePrimers.insert(tempPair); }
1214                                         
1215                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1216                     
1217                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1218                                         primerNameVector.push_back(group);
1219                                 }else if(type == "BARCODE"){
1220                                         m->gobble(in);
1221                                         
1222                     in >> roligo;
1223                     
1224                     for(int i=0;i<roligo.length();i++){
1225                         roligo[i] = toupper(roligo[i]);
1226                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1227                     }
1228                     //roligo = reverseOligo(roligo);
1229                     
1230                     oligosPair newPair(foligo, roligo);
1231                     
1232                     group = "";
1233                     while (!in.eof())   {       
1234                                                 char c = in.get(); 
1235                                                 if (c == 10 || c == 13){        break;  }
1236                                                 else if (c == 32 || c == 9){;} //space or tab
1237                                                 else {  group += c;  }
1238                                         } 
1239                                         
1240                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1241                         
1242                     //check for repeat barcodes
1243                     string tempPair = foligo+roligo;
1244                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1245                     else { uniqueBarcodes.insert(tempPair); }
1246                         
1247                     barcodes[indexBarcode]=newPair; indexBarcode++;
1248                                         barcodeNameVector.push_back(group);
1249                                 }else if(type == "LINKER"){
1250                                         linker.push_back(foligo);
1251                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
1252                                 }else if(type == "SPACER"){
1253                                         spacer.push_back(foligo);
1254                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
1255                                 }
1256                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
1257                         }
1258                         m->gobble(in);
1259                 }       
1260                 in.close();
1261                 
1262                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1263                 
1264                 //add in potential combos
1265                 if(barcodeNameVector.size() == 0){
1266             oligosPair temp("", "");
1267                         barcodes[0] = temp;
1268                         barcodeNameVector.push_back("");                        
1269                 }
1270                 
1271                 if(primerNameVector.size() == 0){
1272             oligosPair temp("", "");
1273                         primers[0] = temp;
1274                         primerNameVector.push_back("");                 
1275                 }
1276                 
1277                 fastaFileNames.resize(barcodeNameVector.size());
1278                 for(int i=0;i<fastaFileNames.size();i++){
1279                         fastaFileNames[i].assign(primerNameVector.size(), "");
1280                 }
1281                 qualFileNames = fastaFileNames; 
1282                 
1283                 if(allFiles){
1284                         set<string> uniqueNames; //used to cleanup outputFileNames
1285                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1286                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1287                                         
1288                                         string primerName = primerNameVector[itPrimer->first];
1289                                         string barcodeName = barcodeNameVector[itBar->first];
1290                                         
1291                                         string comboGroupName = "";
1292                                         string fastaFileName = "";
1293                                         string qualFileName = "";
1294                                         string nameFileName = "";
1295                     string countFileName = "";
1296                                         
1297                                         if(primerName == ""){
1298                                                 comboGroupName = barcodeNameVector[itBar->first];
1299                                         }
1300                                         else{
1301                                                 if(barcodeName == ""){
1302                                                         comboGroupName = primerNameVector[itPrimer->first];
1303                                                 }
1304                                                 else{
1305                                                         comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1306                                                 }
1307                                         }
1308                                         
1309                                         
1310                                         ofstream temp;
1311                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".fasta";
1312                                         if (uniqueNames.count(fastaFileName) == 0) {
1313                                                 outputNames.push_back(fastaFileName);
1314                                                 outputTypes["fasta"].push_back(fastaFileName);
1315                                                 uniqueNames.insert(fastaFileName);
1316                                         }
1317                                         
1318                                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1319                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1320                                         
1321                                         
1322                     qualFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + comboGroupName + ".qual";
1323                     if (uniqueNames.count(qualFileName) == 0) {
1324                         outputNames.push_back(qualFileName);
1325                         outputTypes["qfile"].push_back(qualFileName);
1326                     }
1327                                                 
1328                     qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1329                     m->openOutputFile(qualFileName, temp);              temp.close();
1330                                 }
1331                         }
1332                 }
1333                 
1334                 bool allBlank = true;
1335                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1336                         if (barcodeNameVector[i] != "") {
1337                                 allBlank = false;
1338                                 break;
1339                         }
1340                 }
1341                 for (int i = 0; i < primerNameVector.size(); i++) {
1342                         if (primerNameVector[i] != "") {
1343                                 allBlank = false;
1344                                 break;
1345                         }
1346                 }
1347         
1348                 if (allBlank) {
1349                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1350                         allFiles = false;
1351                         return false;
1352                 }
1353                 
1354                 return true;
1355                 
1356         }
1357         catch(exception& e) {
1358                 m->errorOut(e, "MakeContigsCommand", "getOligos");
1359                 exit(1);
1360         }
1361 }
1362 //********************************************************************/
1363 string MakeContigsCommand::reverseOligo(string oligo){
1364         try {
1365         string reverse = "";
1366         
1367         for(int i=oligo.length()-1;i>=0;i--){
1368             
1369             if(oligo[i] == 'A')         {       reverse += 'T'; }
1370             else if(oligo[i] == 'T'){   reverse += 'A'; }
1371             else if(oligo[i] == 'U'){   reverse += 'A'; }
1372             
1373             else if(oligo[i] == 'G'){   reverse += 'C'; }
1374             else if(oligo[i] == 'C'){   reverse += 'G'; }
1375             
1376             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1377             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1378             
1379             else if(oligo[i] == 'M'){   reverse += 'K'; }
1380             else if(oligo[i] == 'K'){   reverse += 'M'; }
1381             
1382             else if(oligo[i] == 'W'){   reverse += 'W'; }
1383             else if(oligo[i] == 'S'){   reverse += 'S'; }
1384             
1385             else if(oligo[i] == 'B'){   reverse += 'V'; }
1386             else if(oligo[i] == 'V'){   reverse += 'B'; }
1387             
1388             else if(oligo[i] == 'D'){   reverse += 'H'; }
1389             else if(oligo[i] == 'H'){   reverse += 'D'; }
1390             
1391             else                                                {       reverse += 'N'; }
1392         }
1393         
1394         
1395         return reverse;
1396     }
1397         catch(exception& e) {
1398                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
1399                 exit(1);
1400         }
1401 }
1402 //**********************************************************************************************************************
1403
1404
1405
1406