]> git.donarmstrong.com Git - mothur.git/blob - chimeraperseuscommand.cpp
added SequenceCountParser class to parse the count table by group. added count parame...
[mothur.git] / chimeraperseuscommand.cpp
1 /*
2  *  chimeraperseuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/26/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 #include "counttable.h"
14 #include "sequencecountparser.h"
15 //**********************************************************************************************************************
16 vector<string> ChimeraPerseusCommand::setParameters(){  
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
26                 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
27                 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
28                         
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ChimeraPerseusCommand::getHelpString(){  
40         try {
41                 string helpString = "";
42                 helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
43                 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
45                 helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
46         helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
48                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50                 helpString += "The alpha parameter ....  The default is -5.54. \n";
51                 helpString += "The beta parameter ....  The default is 0.33. \n";
52                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
53                 helpString += "The chimera.perseus command should be in the following format: \n";
54                 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
55                 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
56                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){   
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "chimera") {  outputFileName =  "perseus.chimeras"; }
75             else if (type == "accnos") {  outputFileName =  "perseus.accnos"; }
76             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
77         }
78         return outputFileName;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 ChimeraPerseusCommand::ChimeraPerseusCommand(){ 
87         try {
88                 abort = true; calledHelp = true;
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["chimera"] = tempOutNames;
92                 outputTypes["accnos"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
96                 exit(1);
97         }
98 }
99 //***************************************************************************************************************
100 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
101         try {
102                 abort = false; calledHelp = false; 
103         hasCount = false;
104         hasName = false;
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter("chimera.perseus");
117                         map<string,string>::iterator it;
118                         
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         vector<string> tempOutNames;
125                         outputTypes["chimera"] = tempOutNames;
126                         outputTypes["accnos"] = tempOutNames;
127                         
128                         //if the user changes the input directory command factory will send this info to us in the output parameter 
129                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
130                         if (inputDir == "not found"){   inputDir = "";          }
131                         
132                         //check for required parameters
133                         fastafile = validParameter.validFile(parameters, "fasta", false);
134                         if (fastafile == "not found") {                                 
135                                 //if there is a current fasta file, use it
136                                 string filename = m->getFastaFile(); 
137                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
138                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
139                         }else { 
140                                 m->splitAtDash(fastafile, fastaFileNames);
141                                 
142                                 //go through files and make sure they are good, if not, then disregard them
143                                 for (int i = 0; i < fastaFileNames.size(); i++) {
144                                         
145                                         bool ignore = false;
146                                         if (fastaFileNames[i] == "current") { 
147                                                 fastaFileNames[i] = m->getFastaFile(); 
148                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
149                                                 else {  
150                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
151                                                         //erase from file list
152                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
153                                                         i--;
154                                                 }
155                                         }
156                                         
157                                         if (!ignore) {
158                                                 
159                                                 if (inputDir != "") {
160                                                         string path = m->hasPath(fastaFileNames[i]);
161                                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
163                                                 }
164                                                 
165                                                 int ableToOpen;
166                                                 ifstream in;
167                                                 
168                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
169                                                 
170                                                 //if you can't open it, try default location
171                                                 if (ableToOpen == 1) {
172                                                         if (m->getDefaultPath() != "") { //default path is set
173                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
174                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
175                                                                 ifstream in2;
176                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
177                                                                 in2.close();
178                                                                 fastaFileNames[i] = tryPath;
179                                                         }
180                                                 }
181                                                 
182                                                 if (ableToOpen == 1) {
183                                                         if (m->getOutputDir() != "") { //default path is set
184                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
185                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
186                                                                 ifstream in2;
187                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
188                                                                 in2.close();
189                                                                 fastaFileNames[i] = tryPath;
190                                                         }
191                                                 }
192                                                 
193                                                 in.close();
194                                                 
195                                                 if (ableToOpen == 1) { 
196                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
197                                                         //erase from file list
198                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
199                                                         i--;
200                                                 }else {
201                                                         m->setFastaFile(fastaFileNames[i]);
202                                                 }
203                                         }
204                                 }
205                                 
206                                 //make sure there is at least one valid file left
207                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
208                         }
209                         
210                         
211                         //check for required parameters
212                         namefile = validParameter.validFile(parameters, "name", false);
213                         if (namefile == "not found") { namefile = "";   }
214                         else { 
215                                 m->splitAtDash(namefile, nameFileNames);
216                                 
217                                 //go through files and make sure they are good, if not, then disregard them
218                                 for (int i = 0; i < nameFileNames.size(); i++) {
219                                         
220                                         bool ignore = false;
221                                         if (nameFileNames[i] == "current") { 
222                                                 nameFileNames[i] = m->getNameFile(); 
223                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
224                                                 else {  
225                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
226                                                         //erase from file list
227                                                         nameFileNames.erase(nameFileNames.begin()+i);
228                                                         i--;
229                                                 }
230                                         }
231                                         
232                                         if (!ignore) {
233                                                 
234                                                 if (inputDir != "") {
235                                                         string path = m->hasPath(nameFileNames[i]);
236                                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
238                                                 }
239                                                 
240                                                 int ableToOpen;
241                                                 ifstream in;
242                                                 
243                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
244                                                 
245                                                 //if you can't open it, try default location
246                                                 if (ableToOpen == 1) {
247                                                         if (m->getDefaultPath() != "") { //default path is set
248                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
249                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
250                                                                 ifstream in2;
251                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
252                                                                 in2.close();
253                                                                 nameFileNames[i] = tryPath;
254                                                         }
255                                                 }
256                                                 
257                                                 if (ableToOpen == 1) {
258                                                         if (m->getOutputDir() != "") { //default path is set
259                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
260                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
261                                                                 ifstream in2;
262                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
263                                                                 in2.close();
264                                                                 nameFileNames[i] = tryPath;
265                                                         }
266                                                 }
267                                                 
268                                                 in.close();
269                                                 
270                                                 if (ableToOpen == 1) { 
271                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
272                                                         //erase from file list
273                                                         nameFileNames.erase(nameFileNames.begin()+i);
274                                                         i--;
275                                                 }else {
276                                                         m->setNameFile(nameFileNames[i]);
277                                                 }
278                                         }
279                                 }
280                         }
281             
282             if (nameFileNames.size() != 0) { hasName = true; }
283             
284             //check for required parameters
285             vector<string> countfileNames;
286                         countfile = validParameter.validFile(parameters, "count", false);
287                         if (countfile == "not found") { 
288                 countfile = "";  
289                         }else { 
290                                 m->splitAtDash(countfile, countfileNames);
291                                 
292                                 //go through files and make sure they are good, if not, then disregard them
293                                 for (int i = 0; i < countfileNames.size(); i++) {
294                                         
295                                         bool ignore = false;
296                                         if (countfileNames[i] == "current") { 
297                                                 countfileNames[i] = m->getCountTableFile(); 
298                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
299                                                 else {  
300                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
301                                                         //erase from file list
302                                                         countfileNames.erase(countfileNames.begin()+i);
303                                                         i--;
304                                                 }
305                                         }
306                                         
307                                         if (!ignore) {
308                                                 
309                                                 if (inputDir != "") {
310                                                         string path = m->hasPath(countfileNames[i]);
311                                                         //if the user has not given a path then, add inputdir. else leave path alone.
312                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
313                                                 }
314                                                 
315                                                 int ableToOpen;
316                                                 ifstream in;
317                                                 
318                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
319                                                 
320                                                 //if you can't open it, try default location
321                                                 if (ableToOpen == 1) {
322                                                         if (m->getDefaultPath() != "") { //default path is set
323                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
324                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
325                                                                 ifstream in2;
326                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
327                                                                 in2.close();
328                                                                 countfileNames[i] = tryPath;
329                                                         }
330                                                 }
331                                                 
332                                                 if (ableToOpen == 1) {
333                                                         if (m->getOutputDir() != "") { //default path is set
334                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
335                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
336                                                                 ifstream in2;
337                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
338                                                                 in2.close();
339                                                                 countfileNames[i] = tryPath;
340                                                         }
341                                                 }
342                                                 
343                                                 in.close();
344                                                 
345                                                 if (ableToOpen == 1) { 
346                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
347                                                         //erase from file list
348                                                         countfileNames.erase(countfileNames.begin()+i);
349                                                         i--;
350                                                 }else {
351                                                         m->setCountTableFile(countfileNames[i]);
352                                                 }
353                                         }
354                                 }
355                         }
356             
357             if (countfileNames.size() != 0) { hasCount = true; }
358             
359                         //make sure there is at least one valid file left
360             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
361             
362             if (!hasName && !hasCount) { 
363                 //if there is a current name file, use it, else look for current count file
364                                 string filename = m->getNameFile(); 
365                                 if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
366                                 else { 
367                     filename = m->getCountTableFile();
368                     if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
369                     else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
370                 }
371             }
372             if (!hasName && hasCount) { nameFileNames = countfileNames; }
373             
374                         if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
375                         
376                         bool hasGroup = true;
377                         groupfile = validParameter.validFile(parameters, "group", false);
378                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
379                         else { 
380                                 m->splitAtDash(groupfile, groupFileNames);
381                                 
382                                 //go through files and make sure they are good, if not, then disregard them
383                                 for (int i = 0; i < groupFileNames.size(); i++) {
384                                         
385                                         bool ignore = false;
386                                         if (groupFileNames[i] == "current") { 
387                                                 groupFileNames[i] = m->getGroupFile(); 
388                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
389                                                 else {  
390                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
391                                                         //erase from file list
392                                                         groupFileNames.erase(groupFileNames.begin()+i);
393                                                         i--;
394                                                 }
395                                         }
396                                         
397                                         if (!ignore) {
398                                                 
399                                                 if (inputDir != "") {
400                                                         string path = m->hasPath(groupFileNames[i]);
401                                                         //if the user has not given a path then, add inputdir. else leave path alone.
402                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
403                                                 }
404                                                 
405                                                 int ableToOpen;
406                                                 ifstream in;
407                                                 
408                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
409                                                 
410                                                 //if you can't open it, try default location
411                                                 if (ableToOpen == 1) {
412                                                         if (m->getDefaultPath() != "") { //default path is set
413                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
414                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
415                                                                 ifstream in2;
416                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
417                                                                 in2.close();
418                                                                 groupFileNames[i] = tryPath;
419                                                         }
420                                                 }
421                                                 
422                                                 if (ableToOpen == 1) {
423                                                         if (m->getOutputDir() != "") { //default path is set
424                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
425                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
426                                                                 ifstream in2;
427                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
428                                                                 in2.close();
429                                                                 groupFileNames[i] = tryPath;
430                                                         }
431                                                 }
432                                                 
433                                                 in.close();
434                                                 
435                                                 if (ableToOpen == 1) { 
436                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
437                                                         //erase from file list
438                                                         groupFileNames.erase(groupFileNames.begin()+i);
439                                                         i--;
440                                                 }else {
441                                                         m->setGroupFile(groupFileNames[i]);
442                                                 }
443                                         }
444                                 }
445                                 
446                                 //make sure there is at least one valid file left
447                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
448                         }
449                         
450                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
451                         
452             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
453                         
454                         //if the user changes the output directory command factory will send this info to us in the output parameter 
455                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
456                         
457                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
458                         m->setProcessors(temp);
459                         m->mothurConvert(temp, processors);
460                         
461                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
462                         m->mothurConvert(temp, cutoff);
463                         
464                         temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
465                         m->mothurConvert(temp, alpha);
466                         
467                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
468                         m->mothurConvert(temp, beta);
469                 }
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
473                 exit(1);
474         }
475 }
476 //***************************************************************************************************************
477
478 int ChimeraPerseusCommand::execute(){
479         try{
480                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
481                 
482                                 
483                 //process each file
484                 for (int s = 0; s < fastaFileNames.size(); s++) {
485                         
486                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
487                         
488                         int start = time(NULL); 
489                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
490                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
491                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
492
493                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
494                         
495                         //you provided a groupfile
496                         string groupFile = "";
497                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
498                         
499                         string nameFile = "";
500                         if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
501                                 nameFile = nameFileNames[s];
502                         }else { nameFile = getNamesFile(fastaFileNames[s]); }
503                         
504                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
505                         
506                         int numSeqs = 0;
507                         int numChimeras = 0;
508             
509             if (hasCount) {
510                 CountTable* ct = new CountTable();
511                 ct->readTable(nameFile);
512                 
513                 if (ct->hasGroupInfo()) {
514                     cparser = new SequenceCountParser(fastaFileNames[s], *ct);
515                     
516                     vector<string> groups = cparser->getNamesOfGroups();
517                     
518                     if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        }  return 0; }
519                     
520                     //clears files
521                     ofstream out, out1, out2;
522                     m->openOutputFile(outputFileName, out); out.close(); 
523                     m->openOutputFile(accnosFileName, out1); out1.close();
524                     
525                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
526                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
527                     
528                     if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }                               
529                     map<string, string> uniqueNames = cparser->getAllSeqsMap();
530                     numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
531                     delete cparser;
532
533                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
534                     
535                     if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;  } 
536                     
537                 }else {
538                     if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
539                     
540                     //read sequences and store sorted by frequency
541                     vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
542                     
543                     if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
544                     
545                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
546                 }
547                 delete ct;
548             }else {
549                 if (groupFile != "") {
550                     //Parse sequences by group
551                     parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
552                     vector<string> groups = parser->getNamesOfGroups();
553                     
554                     if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {     m->mothurRemove(outputNames[j]);        }  return 0; }
555                     
556                     //clears files
557                     ofstream out, out1, out2;
558                     m->openOutputFile(outputFileName, out); out.close(); 
559                     m->openOutputFile(accnosFileName, out1); out1.close();
560                     
561                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
562                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
563                     
564                     if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0;    }                               
565                     map<string, string> uniqueNames = parser->getAllSeqsMap();
566                     numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
567                     delete parser;
568                     
569                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
570                     
571                     if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;  }         
572                 }else{
573                     if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
574                     
575                     //read sequences and store sorted by frequency
576                     vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
577                     
578                     if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        } return 0; }
579                     
580                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
581                 }
582                         }
583             
584                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
585                         
586                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
587                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
588                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
589                 }
590                 
591                 //set accnos file as new current accnosfile
592                 string current = "";
593                 itTypes = outputTypes.find("accnos");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
596                 }
597                 
598                 m->mothurOutEndLine();
599                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
601                 m->mothurOutEndLine();
602                 
603                 return 0;
604                 
605         }
606         catch(exception& e) {
607                 m->errorOut(e, "ChimeraPerseusCommand", "execute");
608                 exit(1);
609         }
610 }
611 //**********************************************************************************************************************
612 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
613         try {
614                 string nameFile = "";
615                 
616                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
617                 
618                 //use unique.seqs to create new name and fastafile
619                 string inputString = "fasta=" + inputFile;
620                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
621                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
622                 m->mothurCalling = true;
623         
624                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
625                 uniqueCommand->execute();
626                 
627                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
628                 
629                 delete uniqueCommand;
630                 m->mothurCalling = false;
631                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
632                 
633                 nameFile = filenames["name"][0];
634                 inputFile = filenames["fasta"][0];
635                 
636                 return nameFile;
637         }
638         catch(exception& e) {
639                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
640                 exit(1);
641         }
642 }
643 //**********************************************************************************************************************
644 int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, int start, int end, vector<string> groups){
645         try {
646                 
647                 int totalSeqs = 0;
648                 int numChimeras = 0;
649                 
650                 for (int i = start; i < end; i++) {
651                         
652                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
653                         
654                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
655                         
656                         vector<seqData> sequences = loadSequences(groups[i]);
657                         
658                         if (m->control_pressed) { return 0; }
659                         
660                         int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
661                         totalSeqs += numSeqs;
662                         
663                         if (m->control_pressed) { return 0; }
664                         
665                         //append files
666                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
667                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
668                         
669                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
670                 }       
671                 
672                 return totalSeqs;
673                 
674         }
675         catch(exception& e) {
676                 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
677                 exit(1);
678         }
679 }       
680 //**********************************************************************************************************************
681 vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
682         try {
683         bool error = false;
684                 alignLength = 0;
685         vector<seqData> sequences;
686         if (hasCount) {
687             vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
688             map<string, int> counts = cparser->getCountTable(group);
689             map<string, int>::iterator it;
690             
691             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
692                 
693                 if (m->control_pressed) {  return sequences; }
694                 
695                 it = counts.find(thisGroupsSeqs[i].getName());
696                 if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
697                 else {
698                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
699                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
700                 }
701             }
702         }else{
703             vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
704             map<string, string> nameMap = parser->getNameMap(group);
705             map<string, string>::iterator it;
706            
707             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
708                 
709                 if (m->control_pressed) {  return sequences; }
710                 
711                 it = nameMap.find(thisGroupsSeqs[i].getName());
712                 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
713                 else {
714                     int num = m->getNumNames(it->second);
715                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
716                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
717                 }
718             }
719             
720                 }
721                 
722         if (error) { m->control_pressed = true; }
723                 //sort by frequency
724                 sort(sequences.rbegin(), sequences.rend());
725                 
726                 return sequences;
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
730                 exit(1);
731         }
732 }
733
734 //**********************************************************************************************************************
735 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
736         try {
737                 map<string, int>::iterator it;
738                 map<string, int> nameMap = m->readNames(name);
739                 
740                 //read fasta file and create sequenceData structure - checking for file mismatches
741                 vector<seqData> sequences;
742                 bool error = false;
743                 ifstream in;
744                 m->openInputFile(inputFile, in);
745                 alignLength = 0;
746         
747                 while (!in.eof()) {
748                         
749                         if (m->control_pressed) { in.close(); return sequences; }
750                         
751                         Sequence temp(in); m->gobble(in);
752                         
753                         it = nameMap.find(temp.getName());
754                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
755                         else {
756                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
757                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
758                         }
759                 }
760                 in.close();
761                 
762                 if (error) { m->control_pressed = true; }
763                 
764                 //sort by frequency
765                 sort(sequences.rbegin(), sequences.rend());
766                 
767                 return sequences;
768         }
769         catch(exception& e) {
770                 m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
771                 exit(1);
772         }
773 }
774 //**********************************************************************************************************************
775 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
776         try {           
777                 //read fasta file and create sequenceData structure - checking for file mismatches
778                 vector<seqData> sequences;
779                 ifstream in;
780                 m->openInputFile(inputFile, in);
781                 alignLength = 0;
782         
783                 while (!in.eof()) {
784             Sequence temp(in); m->gobble(in);
785                         
786                         int count = ct->getNumSeqs(temp.getName());
787                         if (m->control_pressed) { break; }
788                         else {
789                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
790                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
791                         }
792                 }
793                 in.close();
794                 
795                 //sort by frequency
796                 sort(sequences.rbegin(), sequences.rend());
797                 
798                 return sequences;
799         }
800         catch(exception& e) {
801                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
802                 exit(1);
803         }
804 }
805 //**********************************************************************************************************************
806 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
807         try {
808                 
809                 vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
810                 for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
811                 
812                 correctModel[0][0] = 0.000000;  //AA
813                 correctModel[1][0] = 11.619259; //CA
814                 correctModel[2][0] = 11.694004; //TA
815                 correctModel[3][0] = 7.748623;  //GA
816                 
817                 correctModel[1][1] = 0.000000;  //CC
818                 correctModel[2][1] = 7.619657;  //TC
819                 correctModel[3][1] = 12.852562; //GC
820                 
821                 correctModel[2][2] = 0.000000;  //TT
822                 correctModel[3][2] = 10.964048; //TG
823                 
824                 correctModel[3][3] = 0.000000;  //GG
825                 
826                 for(int i=0;i<4;i++){
827                         for(int j=0;j<i;j++){
828                                 correctModel[j][i] = correctModel[i][j];
829                         }
830                 }
831                 
832                 int numSeqs = sequences.size();
833                 //int alignLength = sequences[0].sequence.size();
834                 
835                 ofstream chimeraFile;
836                 ofstream accnosFile;
837                 m->openOutputFile(chimeraFileName, chimeraFile); 
838                 m->openOutputFile(accnosFileName, accnosFile); 
839                 
840                 Perseus myPerseus;
841                 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
842                 
843                 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
844                 
845                 vector<bool> chimeras(numSeqs, 0);
846                 
847                 for(int i=0;i<numSeqs;i++){     
848                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
849     
850                         vector<bool> restricted = chimeras;
851                         
852                         vector<vector<int> > leftDiffs(numSeqs);
853                         vector<vector<int> > leftMaps(numSeqs);
854                         vector<vector<int> > rightDiffs(numSeqs);
855                         vector<vector<int> > rightMaps(numSeqs);
856                         
857                         vector<int> singleLeft, bestLeft;
858                         vector<int> singleRight, bestRight;
859                         
860                         int bestSingleIndex, bestSingleDiff;
861                         vector<pwAlign> alignments(numSeqs);
862                         
863                         int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
864                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
865
866                         int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
867                         
868                         string dummyA, dummyB;
869                         
870             if (sequences[i].sequence.size() < 3) { 
871                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
872             }else if(comparisons >= 2){ 
873                                 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
874                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
875
876                                 int minMismatchToTrimera = numeric_limits<int>::max();
877                                 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
878                                 
879                                 if(minMismatchToChimera >= 3 && comparisons >= 3){
880                                         minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
881                                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
882                                 }
883                                 
884                                 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
885                                 
886                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
887
888                                 string type;
889                                 string chimeraRefSeq;
890                                 
891                                 if(minMismatchToChimera - minMismatchToTrimera >= 3){
892                                         type = "trimera";
893                                         chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
894                                 }
895                                 else{
896                                         type = "chimera";
897                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
898                                 }
899                                 ;
900                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
901                                 
902                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
903                                 
904                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
905
906                                 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
907                                 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
908                                 
909                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
910
911                                 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
912                                 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
913                                 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
914                                 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
915                                 
916                                 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
917                                 
918                                 chimeraFile << probability << '\t';
919                                 
920                                 if(probability > cutoff){ 
921                                         chimeraFile << type << endl;
922                                         accnosFile << sequences[i].seqName << endl;
923                                         chimeras[i] = 1;
924                                         numChimeras++;
925                                 }
926                                 else{
927                                         chimeraFile << "good" << endl;
928                                 }
929                                 
930                         }
931                         else{
932                                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
933                         }
934         
935                         //report progress
936                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
937                 }
938                 
939                 if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
940                 
941                 chimeraFile.close();
942                 accnosFile.close();
943                 
944                 return numSeqs;
945         }
946         catch(exception& e) {
947                 m->errorOut(e, "ChimeraPerseusCommand", "driver");
948                 exit(1);
949         }
950 }
951 /**************************************************************************************************/
952 int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
953         try {
954                 
955                 vector<int> processIDS;
956                 int process = 1;
957                 int num = 0;
958                 
959                 //sanity check
960                 if (groups.size() < processors) { processors = groups.size(); }
961                 
962                 //divide the groups between the processors
963                 vector<linePair> lines;
964                 int numGroupsPerProcessor = groups.size() / processors;
965                 for (int i = 0; i < processors; i++) {
966                         int startIndex =  i * numGroupsPerProcessor;
967                         int endIndex = (i+1) * numGroupsPerProcessor;
968                         if(i == (processors - 1)){      endIndex = groups.size();       }
969                         lines.push_back(linePair(startIndex, endIndex));
970                 }
971                 
972 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
973                 
974                 //loop through and create all the processes you want
975                 while (process != processors) {
976                         int pid = fork();
977                         
978                         if (pid > 0) {
979                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
980                                 process++;
981                         }else if (pid == 0){
982                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
983                                 
984                                 //pass numSeqs to parent
985                                 ofstream out;
986                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
987                                 m->openOutputFile(tempFile, out);
988                                 out << num << endl;
989                                 out.close();
990                                 
991                                 exit(0);
992                         }else { 
993                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
994                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
995                                 exit(0);
996                         }
997                 }
998                 
999                 //do my part
1000                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1001                 
1002                 //force parent to wait until all the processes are done
1003                 for (int i=0;i<processIDS.size();i++) { 
1004                         int temp = processIDS[i];
1005                         wait(&temp);
1006                 }
1007                 
1008                 for (int i = 0; i < processIDS.size(); i++) {
1009                         ifstream in;
1010                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1011                         m->openInputFile(tempFile, in);
1012                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1013                         in.close(); m->mothurRemove(tempFile);
1014                 }
1015                 
1016 #else
1017                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1018                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1019                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1020                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1021                 
1022                 vector<perseusData*> pDataArray; 
1023                 DWORD   dwThreadIdArray[processors-1];
1024                 HANDLE  hThreadArray[processors-1]; 
1025                 
1026                 //Create processor worker threads.
1027                 for( int i=1; i<processors; i++ ){
1028                         // Allocate memory for thread data.
1029                         string extension = toString(i) + ".temp";
1030                         
1031                         perseusData* tempPerseus = new perseusData(hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
1032                         
1033                         pDataArray.push_back(tempPerseus);
1034                         processIDS.push_back(i);
1035                         
1036                         //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
1037                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1038                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1039                 }
1040                 
1041                 
1042                 //using the main process as a worker saves time and memory
1043                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1044                 
1045                 //Wait until all threads have terminated.
1046                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1047                         
1048                 //Close all thread handles and free memory allocations.
1049                 for(int i=0; i < pDataArray.size(); i++){
1050                         num += pDataArray[i]->count;
1051                         CloseHandle(hThreadArray[i]);
1052                         delete pDataArray[i];
1053                 }
1054 #endif          
1055                 
1056                 
1057                 //append output files
1058                 for(int i=0;i<processIDS.size();i++){
1059                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1060                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1061                         
1062                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1063                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1064                 }
1065                 
1066                 return num;     
1067                 
1068         }
1069         catch(exception& e) {
1070                 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
1071                 exit(1);
1072         }
1073 }
1074 //**********************************************************************************************************************
1075 int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
1076         try {
1077                 map<string, string>::iterator itUnique;
1078                 int total = 0;
1079                 
1080                 //edit accnos file
1081                 ifstream in2; 
1082                 m->openInputFile(accnosFileName, in2);
1083                 
1084                 ofstream out2;
1085                 m->openOutputFile(accnosFileName+".temp", out2);
1086                 
1087                 string name;
1088                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1089                 set<string>::iterator itNames;
1090                 set<string> chimerasInFile;
1091                 set<string>::iterator itChimeras;
1092                 
1093                 
1094                 while (!in2.eof()) {
1095                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1096                         
1097                         in2 >> name; m->gobble(in2);
1098                         
1099                         //find unique name
1100                         itUnique = uniqueNames.find(name);
1101                         
1102                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1103                         else {
1104                                 itChimeras = chimerasInFile.find((itUnique->second));
1105                                 
1106                                 if (itChimeras == chimerasInFile.end()) {
1107                                         out2 << itUnique->second << endl;
1108                                         chimerasInFile.insert((itUnique->second));
1109                                         total++;
1110                                 }
1111                         }
1112                 }
1113                 in2.close();
1114                 out2.close();
1115                 
1116                 m->mothurRemove(accnosFileName);
1117                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1118                 
1119                 //edit chimera file
1120                 ifstream in; 
1121                 m->openInputFile(outputFileName, in);
1122                 
1123                 ofstream out;
1124                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1125                 
1126                 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
1127                 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
1128                 string index, BestMatchName, parent1, parent2, flag;
1129                 name = "";
1130                 namesInFile.clear();    
1131                 //assumptions - in file each read will always look like 
1132                 /*                                                                              
1133                  SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
1134                  0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1135                  1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1136                  2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
1137                  3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
1138                  */
1139                 
1140                 //get and print headers
1141                 BestMatchName = m->getline(in); m->gobble(in);
1142                 out << BestMatchName << endl;
1143                 
1144                 while (!in.eof()) {
1145                         
1146                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1147                         
1148                         bool print = false;
1149                         in >> index;    m->gobble(in);
1150                         
1151                         if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
1152                                 in >> name;             m->gobble(in);
1153                                 in >> DiffsToBestMatch; m->gobble(in);
1154                                 in >> BestMatchIndex; m->gobble(in);
1155                                 in >> BestMatchName; m->gobble(in);
1156                                 in >> DiffstToChimera; m->gobble(in);
1157                                 in >> IndexofLeftParent; m->gobble(in);
1158                                 in >> IndexOfRightParent; m->gobble(in);
1159                                 in >> parent1;  m->gobble(in);
1160                                 in >> parent2;  m->gobble(in);
1161                                 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
1162                                 
1163                                 //find unique name
1164                                 itUnique = uniqueNames.find(name);
1165                                 
1166                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1167                                 else {
1168                                         name = itUnique->second;
1169                                         //is this name already in the file
1170                                         itNames = namesInFile.find((name));
1171                                         
1172                                         if (itNames == namesInFile.end()) { //no not in file
1173                                                 if (flag == "good") { //are you really a no??
1174                                                         //is this sequence really not chimeric??
1175                                                         itChimeras = chimerasInFile.find(name);
1176                                                         
1177                                                         //then you really are a no so print, otherwise skip
1178                                                         if (itChimeras == chimerasInFile.end()) { print = true; }
1179                                                 }else{ print = true; }
1180                                         }
1181                                 }
1182                                 
1183                                 if (print) {
1184                                         out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1185                                         namesInFile.insert(name);
1186                                         
1187                                         if (BestMatchName != "Null") {
1188                                                 itUnique = uniqueNames.find(BestMatchName);
1189                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1190                                                 else {  out << itUnique->second << '\t';        }                                       
1191                                         }else { out << "Null" << '\t'; }
1192                                         
1193                                         out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1194                                         
1195                                         if (parent1 != "Null") {
1196                                                 itUnique = uniqueNames.find(parent1);
1197                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1198                                                 else {  out << itUnique->second << '\t';        }
1199                                         }else { out << "Null" << '\t'; }
1200                                         
1201                                         if (parent1 != "Null") {
1202                                                 itUnique = uniqueNames.find(parent2);
1203                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1204                                                 else {  out << itUnique->second << '\t';        }
1205                                         }else { out << "Null" << '\t'; }
1206                                         
1207                                         out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
1208                                 }
1209                         }else { index = m->getline(in); m->gobble(in); }
1210                 }
1211                 in.close();
1212                 out.close();
1213                 
1214                 m->mothurRemove(outputFileName);
1215                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1216                 
1217                 return total;
1218         }
1219         catch(exception& e) {
1220                 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1221                 exit(1);
1222         }
1223 }       
1224 //**********************************************************************************************************************
1225
1226