]> git.donarmstrong.com Git - mothur.git/blob - chimeraperseuscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimeraperseuscommand.cpp
1 /*
2  *  chimeraperseuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/26/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 #include "counttable.h"
14 #include "sequencecountparser.h"
15 //**********************************************************************************************************************
16 vector<string> ChimeraPerseusCommand::setParameters(){  
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
26                 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
27                 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
28                         
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ChimeraPerseusCommand::getHelpString(){  
40         try {
41                 string helpString = "";
42                 helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
43                 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
45                 helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
46         helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
48                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50                 helpString += "The alpha parameter ....  The default is -5.54. \n";
51                 helpString += "The beta parameter ....  The default is 0.33. \n";
52                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
53                 helpString += "The chimera.perseus command should be in the following format: \n";
54                 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
55                 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
56                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string ChimeraPerseusCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "chimera") {  pattern = "[filename],perseus.chimeras"; } 
70         else if (type == "accnos") {  pattern = "[filename],perseus.accnos"; } 
71         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
72         
73         return pattern;
74     }
75     catch(exception& e) {
76         m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
77         exit(1);
78     }
79 }
80 //**********************************************************************************************************************
81 ChimeraPerseusCommand::ChimeraPerseusCommand(){ 
82         try {
83                 abort = true; calledHelp = true;
84                 setParameters();
85                 vector<string> tempOutNames;
86                 outputTypes["chimera"] = tempOutNames;
87                 outputTypes["accnos"] = tempOutNames;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
91                 exit(1);
92         }
93 }
94 //***************************************************************************************************************
95 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
96         try {
97                 abort = false; calledHelp = false; 
98         hasCount = false;
99         hasName = false;
100                 
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                         vector<string> myArray = setParameters();
107                         
108                         OptionParser parser(option);
109                         map<string,string> parameters = parser.getParameters();
110                         
111                         ValidParameters validParameter("chimera.perseus");
112                         map<string,string>::iterator it;
113                         
114                         //check to make sure all parameters are valid for command
115                         for (it = parameters.begin(); it != parameters.end(); it++) { 
116                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
117                         }
118                         
119                         vector<string> tempOutNames;
120                         outputTypes["chimera"] = tempOutNames;
121                         outputTypes["accnos"] = tempOutNames;
122                         
123                         //if the user changes the input directory command factory will send this info to us in the output parameter 
124                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
125                         if (inputDir == "not found"){   inputDir = "";          }
126                         
127                         //check for required parameters
128                         fastafile = validParameter.validFile(parameters, "fasta", false);
129                         if (fastafile == "not found") {                                 
130                                 //if there is a current fasta file, use it
131                                 string filename = m->getFastaFile(); 
132                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
133                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
134                         }else { 
135                                 m->splitAtDash(fastafile, fastaFileNames);
136                                 
137                                 //go through files and make sure they are good, if not, then disregard them
138                                 for (int i = 0; i < fastaFileNames.size(); i++) {
139                                         
140                                         bool ignore = false;
141                                         if (fastaFileNames[i] == "current") { 
142                                                 fastaFileNames[i] = m->getFastaFile(); 
143                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
144                                                 else {  
145                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
146                                                         //erase from file list
147                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
148                                                         i--;
149                                                 }
150                                         }
151                                         
152                                         if (!ignore) {
153                                                 
154                                                 if (inputDir != "") {
155                                                         string path = m->hasPath(fastaFileNames[i]);
156                                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
158                                                 }
159                                                 
160                                                 int ableToOpen;
161                                                 ifstream in;
162                                                 
163                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
164                                                 
165                                                 //if you can't open it, try default location
166                                                 if (ableToOpen == 1) {
167                                                         if (m->getDefaultPath() != "") { //default path is set
168                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
169                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
170                                                                 ifstream in2;
171                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
172                                                                 in2.close();
173                                                                 fastaFileNames[i] = tryPath;
174                                                         }
175                                                 }
176                                                 
177                                                 if (ableToOpen == 1) {
178                                                         if (m->getOutputDir() != "") { //default path is set
179                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
180                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
181                                                                 ifstream in2;
182                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
183                                                                 in2.close();
184                                                                 fastaFileNames[i] = tryPath;
185                                                         }
186                                                 }
187                                                 
188                                                 in.close();
189                                                 
190                                                 if (ableToOpen == 1) { 
191                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
192                                                         //erase from file list
193                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
194                                                         i--;
195                                                 }else {
196                                                         m->setFastaFile(fastaFileNames[i]);
197                                                 }
198                                         }
199                                 }
200                                 
201                                 //make sure there is at least one valid file left
202                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
203                         }
204                         
205                         
206                         //check for required parameters
207                         namefile = validParameter.validFile(parameters, "name", false);
208                         if (namefile == "not found") { namefile = "";   }
209                         else { 
210                                 m->splitAtDash(namefile, nameFileNames);
211                                 
212                                 //go through files and make sure they are good, if not, then disregard them
213                                 for (int i = 0; i < nameFileNames.size(); i++) {
214                                         
215                                         bool ignore = false;
216                                         if (nameFileNames[i] == "current") { 
217                                                 nameFileNames[i] = m->getNameFile(); 
218                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
219                                                 else {  
220                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
221                                                         //erase from file list
222                                                         nameFileNames.erase(nameFileNames.begin()+i);
223                                                         i--;
224                                                 }
225                                         }
226                                         
227                                         if (!ignore) {
228                                                 
229                                                 if (inputDir != "") {
230                                                         string path = m->hasPath(nameFileNames[i]);
231                                                         //if the user has not given a path then, add inputdir. else leave path alone.
232                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
233                                                 }
234                                                 
235                                                 int ableToOpen;
236                                                 ifstream in;
237                                                 
238                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
239                                                 
240                                                 //if you can't open it, try default location
241                                                 if (ableToOpen == 1) {
242                                                         if (m->getDefaultPath() != "") { //default path is set
243                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
244                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
245                                                                 ifstream in2;
246                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
247                                                                 in2.close();
248                                                                 nameFileNames[i] = tryPath;
249                                                         }
250                                                 }
251                                                 
252                                                 if (ableToOpen == 1) {
253                                                         if (m->getOutputDir() != "") { //default path is set
254                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
255                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
256                                                                 ifstream in2;
257                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
258                                                                 in2.close();
259                                                                 nameFileNames[i] = tryPath;
260                                                         }
261                                                 }
262                                                 
263                                                 in.close();
264                                                 
265                                                 if (ableToOpen == 1) { 
266                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
267                                                         //erase from file list
268                                                         nameFileNames.erase(nameFileNames.begin()+i);
269                                                         i--;
270                                                 }else {
271                                                         m->setNameFile(nameFileNames[i]);
272                                                 }
273                                         }
274                                 }
275                         }
276             
277             if (nameFileNames.size() != 0) { hasName = true; }
278             
279             //check for required parameters
280             vector<string> countfileNames;
281                         countfile = validParameter.validFile(parameters, "count", false);
282                         if (countfile == "not found") { 
283                 countfile = "";  
284                         }else { 
285                                 m->splitAtDash(countfile, countfileNames);
286                                 
287                                 //go through files and make sure they are good, if not, then disregard them
288                                 for (int i = 0; i < countfileNames.size(); i++) {
289                                         
290                                         bool ignore = false;
291                                         if (countfileNames[i] == "current") { 
292                                                 countfileNames[i] = m->getCountTableFile(); 
293                                                 if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
294                                                 else {  
295                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
296                                                         //erase from file list
297                                                         countfileNames.erase(countfileNames.begin()+i);
298                                                         i--;
299                                                 }
300                                         }
301                                         
302                                         if (!ignore) {
303                                                 
304                                                 if (inputDir != "") {
305                                                         string path = m->hasPath(countfileNames[i]);
306                                                         //if the user has not given a path then, add inputdir. else leave path alone.
307                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
308                                                 }
309                                                 
310                                                 int ableToOpen;
311                                                 ifstream in;
312                                                 
313                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
314                                                 
315                                                 //if you can't open it, try default location
316                                                 if (ableToOpen == 1) {
317                                                         if (m->getDefaultPath() != "") { //default path is set
318                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
319                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
320                                                                 ifstream in2;
321                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
322                                                                 in2.close();
323                                                                 countfileNames[i] = tryPath;
324                                                         }
325                                                 }
326                                                 
327                                                 if (ableToOpen == 1) {
328                                                         if (m->getOutputDir() != "") { //default path is set
329                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
330                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
331                                                                 ifstream in2;
332                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
333                                                                 in2.close();
334                                                                 countfileNames[i] = tryPath;
335                                                         }
336                                                 }
337                                                 
338                                                 in.close();
339                                                 
340                                                 if (ableToOpen == 1) { 
341                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
342                                                         //erase from file list
343                                                         countfileNames.erase(countfileNames.begin()+i);
344                                                         i--;
345                                                 }else {
346                                                         m->setCountTableFile(countfileNames[i]);
347                                                 }
348                                         }
349                                 }
350                         }
351             
352             if (countfileNames.size() != 0) { hasCount = true; }
353             
354                         //make sure there is at least one valid file left
355             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
356             
357             if (!hasName && !hasCount) { 
358                 //if there is a current name file, use it, else look for current count file
359                                 string filename = m->getNameFile(); 
360                                 if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
361                                 else { 
362                     filename = m->getCountTableFile();
363                     if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
364                     else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
365                 }
366             }
367             if (!hasName && hasCount) { nameFileNames = countfileNames; }
368             
369                         if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
370                         
371                         bool hasGroup = true;
372                         groupfile = validParameter.validFile(parameters, "group", false);
373                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
374                         else { 
375                                 m->splitAtDash(groupfile, groupFileNames);
376                                 
377                                 //go through files and make sure they are good, if not, then disregard them
378                                 for (int i = 0; i < groupFileNames.size(); i++) {
379                                         
380                                         bool ignore = false;
381                                         if (groupFileNames[i] == "current") { 
382                                                 groupFileNames[i] = m->getGroupFile(); 
383                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
384                                                 else {  
385                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
386                                                         //erase from file list
387                                                         groupFileNames.erase(groupFileNames.begin()+i);
388                                                         i--;
389                                                 }
390                                         }
391                                         
392                                         if (!ignore) {
393                                                 
394                                                 if (inputDir != "") {
395                                                         string path = m->hasPath(groupFileNames[i]);
396                                                         //if the user has not given a path then, add inputdir. else leave path alone.
397                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
398                                                 }
399                                                 
400                                                 int ableToOpen;
401                                                 ifstream in;
402                                                 
403                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
404                                                 
405                                                 //if you can't open it, try default location
406                                                 if (ableToOpen == 1) {
407                                                         if (m->getDefaultPath() != "") { //default path is set
408                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
409                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
410                                                                 ifstream in2;
411                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
412                                                                 in2.close();
413                                                                 groupFileNames[i] = tryPath;
414                                                         }
415                                                 }
416                                                 
417                                                 if (ableToOpen == 1) {
418                                                         if (m->getOutputDir() != "") { //default path is set
419                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
420                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
421                                                                 ifstream in2;
422                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
423                                                                 in2.close();
424                                                                 groupFileNames[i] = tryPath;
425                                                         }
426                                                 }
427                                                 
428                                                 in.close();
429                                                 
430                                                 if (ableToOpen == 1) { 
431                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
432                                                         //erase from file list
433                                                         groupFileNames.erase(groupFileNames.begin()+i);
434                                                         i--;
435                                                 }else {
436                                                         m->setGroupFile(groupFileNames[i]);
437                                                 }
438                                         }
439                                 }
440                                 
441                                 //make sure there is at least one valid file left
442                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
443                         }
444                         
445                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
446                         
447             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
448                         
449                         //if the user changes the output directory command factory will send this info to us in the output parameter 
450                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
451                         
452                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
453                         m->setProcessors(temp);
454                         m->mothurConvert(temp, processors);
455                         
456                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
457                         m->mothurConvert(temp, cutoff);
458                         
459                         temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
460                         m->mothurConvert(temp, alpha);
461                         
462                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
463                         m->mothurConvert(temp, beta);
464                 }
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
468                 exit(1);
469         }
470 }
471 //***************************************************************************************************************
472
473 int ChimeraPerseusCommand::execute(){
474         try{
475                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
476                 
477                                 
478                 //process each file
479                 for (int s = 0; s < fastaFileNames.size(); s++) {
480                         
481                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
482                         
483                         int start = time(NULL); 
484                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
485                         map<string, string> variables;
486                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
487                         string outputFileName = getOutputFileName("chimera", variables);
488                         string accnosFileName = getOutputFileName("accnos", variables);
489
490                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
491                         
492                         //you provided a groupfile
493                         string groupFile = "";
494                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
495                         
496                         string nameFile = "";
497                         if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
498                                 nameFile = nameFileNames[s];
499                         }else { nameFile = getNamesFile(fastaFileNames[s]); }
500                         
501                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
502                         
503                         int numSeqs = 0;
504                         int numChimeras = 0;
505             
506             if (hasCount) {
507                 CountTable* ct = new CountTable();
508                 ct->readTable(nameFile);
509                 
510                 if (ct->hasGroupInfo()) {
511                     cparser = new SequenceCountParser(fastaFileNames[s], *ct);
512                     
513                     vector<string> groups = cparser->getNamesOfGroups();
514                     
515                     if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        }  return 0; }
516                     
517                     //clears files
518                     ofstream out, out1, out2;
519                     m->openOutputFile(outputFileName, out); out.close(); 
520                     m->openOutputFile(accnosFileName, out1); out1.close();
521                     
522                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
523                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
524                     
525                     if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }                               
526                     map<string, string> uniqueNames = cparser->getAllSeqsMap();
527                     numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
528                     delete cparser;
529
530                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
531                     
532                     if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;  } 
533                     
534                 }else {
535                     if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
536                     
537                     //read sequences and store sorted by frequency
538                     vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
539                     
540                     if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
541                     
542                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
543                 }
544                 delete ct;
545             }else {
546                 if (groupFile != "") {
547                     //Parse sequences by group
548                     parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
549                     vector<string> groups = parser->getNamesOfGroups();
550                     
551                     if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {     m->mothurRemove(outputNames[j]);        }  return 0; }
552                     
553                     //clears files
554                     ofstream out, out1, out2;
555                     m->openOutputFile(outputFileName, out); out.close(); 
556                     m->openOutputFile(accnosFileName, out1); out1.close();
557                     
558                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
559                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
560                     
561                     if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0;    }                               
562                     map<string, string> uniqueNames = parser->getAllSeqsMap();
563                     numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
564                     delete parser;
565                     
566                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
567                     
568                     if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;  }         
569                 }else{
570                     if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
571                     
572                     //read sequences and store sorted by frequency
573                     vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
574                     
575                     if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        } return 0; }
576                     
577                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
578                 }
579                         }
580             
581                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
582                         
583                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
584                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
585                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
586                 }
587                 
588                 //set accnos file as new current accnosfile
589                 string current = "";
590                 itTypes = outputTypes.find("accnos");
591                 if (itTypes != outputTypes.end()) {
592                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
593                 }
594                 
595                 m->mothurOutEndLine();
596                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
597                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
598                 m->mothurOutEndLine();
599                 
600                 return 0;
601                 
602         }
603         catch(exception& e) {
604                 m->errorOut(e, "ChimeraPerseusCommand", "execute");
605                 exit(1);
606         }
607 }
608 //**********************************************************************************************************************
609 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
610         try {
611                 string nameFile = "";
612                 
613                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
614                 
615                 //use unique.seqs to create new name and fastafile
616                 string inputString = "fasta=" + inputFile;
617                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
618                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
619                 m->mothurCalling = true;
620         
621                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
622                 uniqueCommand->execute();
623                 
624                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
625                 
626                 delete uniqueCommand;
627                 m->mothurCalling = false;
628                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
629                 
630                 nameFile = filenames["name"][0];
631                 inputFile = filenames["fasta"][0];
632                 
633                 return nameFile;
634         }
635         catch(exception& e) {
636                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
637                 exit(1);
638         }
639 }
640 //**********************************************************************************************************************
641 int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, int start, int end, vector<string> groups){
642         try {
643                 
644                 int totalSeqs = 0;
645                 int numChimeras = 0;
646                 
647                 for (int i = start; i < end; i++) {
648                         
649                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
650                         
651                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
652                         
653                         vector<seqData> sequences = loadSequences(groups[i]);
654                         
655                         if (m->control_pressed) { return 0; }
656                         
657                         int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
658                         totalSeqs += numSeqs;
659                         
660                         if (m->control_pressed) { return 0; }
661                         
662                         //append files
663                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
664                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
665                         
666                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
667                 }       
668                 
669                 return totalSeqs;
670                 
671         }
672         catch(exception& e) {
673                 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
674                 exit(1);
675         }
676 }       
677 //**********************************************************************************************************************
678 vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
679         try {
680         bool error = false;
681                 alignLength = 0;
682         vector<seqData> sequences;
683         if (hasCount) {
684             vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
685             map<string, int> counts = cparser->getCountTable(group);
686             map<string, int>::iterator it;
687             
688             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
689                 
690                 if (m->control_pressed) {  return sequences; }
691                 
692                 it = counts.find(thisGroupsSeqs[i].getName());
693                 if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
694                 else {
695                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
696                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
697                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
698                 }
699             }
700         }else{
701             vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
702             map<string, string> nameMap = parser->getNameMap(group);
703             map<string, string>::iterator it;
704            
705             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
706                 
707                 if (m->control_pressed) {  return sequences; }
708                 
709                 it = nameMap.find(thisGroupsSeqs[i].getName());
710                 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
711                 else {
712                     int num = m->getNumNames(it->second);
713                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
714                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
715                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
716                 }
717             }
718             
719                 }
720                 
721         if (error) { m->control_pressed = true; }
722                 //sort by frequency
723                 sort(sequences.rbegin(), sequences.rend());
724                 
725                 return sequences;
726         }
727         catch(exception& e) {
728                 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
729                 exit(1);
730         }
731 }
732
733 //**********************************************************************************************************************
734 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
735         try {
736                 map<string, int>::iterator it;
737                 map<string, int> nameMap = m->readNames(name);
738                 
739                 //read fasta file and create sequenceData structure - checking for file mismatches
740                 vector<seqData> sequences;
741                 bool error = false;
742                 ifstream in;
743                 m->openInputFile(inputFile, in);
744                 alignLength = 0;
745         
746                 while (!in.eof()) {
747                         
748                         if (m->control_pressed) { in.close(); return sequences; }
749                         
750                         Sequence temp(in); m->gobble(in);
751                         
752                         it = nameMap.find(temp.getName());
753                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
754                         else {
755                 temp.setAligned(removeNs(temp.getUnaligned()));
756                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
757                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
758                         }
759                 }
760                 in.close();
761                 
762                 if (error) { m->control_pressed = true; }
763                 
764                 //sort by frequency
765                 sort(sequences.rbegin(), sequences.rend());
766                 
767                 return sequences;
768         }
769         catch(exception& e) {
770                 m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
771                 exit(1);
772         }
773 }
774 //**********************************************************************************************************************
775 string ChimeraPerseusCommand::removeNs(string seq){
776         try {
777         string newSeq = "";
778         for (int i = 0; i < seq.length(); i++) {
779             if (seq[i] != 'N') {  newSeq += seq[i]; }
780         }
781         return newSeq;
782     }
783         catch(exception& e) {
784                 m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
785                 exit(1);
786         }
787 }
788 //**********************************************************************************************************************
789 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
790         try {           
791                 //read fasta file and create sequenceData structure - checking for file mismatches
792                 vector<seqData> sequences;
793                 ifstream in;
794                 m->openInputFile(inputFile, in);
795                 alignLength = 0;
796         
797                 while (!in.eof()) {
798             Sequence temp(in); m->gobble(in);
799                         
800                         int count = ct->getNumSeqs(temp.getName());
801                         if (m->control_pressed) { break; }
802                         else {
803                 temp.setAligned(removeNs(temp.getUnaligned()));
804                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
805                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
806                         }
807                 }
808                 in.close();
809                 
810                 //sort by frequency
811                 sort(sequences.rbegin(), sequences.rend());
812                 
813                 return sequences;
814         }
815         catch(exception& e) {
816                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
817                 exit(1);
818         }
819 }
820 //**********************************************************************************************************************
821 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
822         try {
823                 
824                 vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
825                 for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
826                 
827                 correctModel[0][0] = 0.000000;  //AA
828                 correctModel[1][0] = 11.619259; //CA
829                 correctModel[2][0] = 11.694004; //TA
830                 correctModel[3][0] = 7.748623;  //GA
831                 
832                 correctModel[1][1] = 0.000000;  //CC
833                 correctModel[2][1] = 7.619657;  //TC
834                 correctModel[3][1] = 12.852562; //GC
835                 
836                 correctModel[2][2] = 0.000000;  //TT
837                 correctModel[3][2] = 10.964048; //TG
838                 
839                 correctModel[3][3] = 0.000000;  //GG
840                 
841                 for(int i=0;i<4;i++){
842                         for(int j=0;j<i;j++){
843                                 correctModel[j][i] = correctModel[i][j];
844                         }
845                 }
846                 
847                 int numSeqs = sequences.size();
848                 //int alignLength = sequences[0].sequence.size();
849                 
850                 ofstream chimeraFile;
851                 ofstream accnosFile;
852                 m->openOutputFile(chimeraFileName, chimeraFile); 
853                 m->openOutputFile(accnosFileName, accnosFile); 
854                 
855                 Perseus myPerseus;
856                 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
857                 
858                 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
859                 
860                 vector<bool> chimeras(numSeqs, 0);
861                 
862                 for(int i=0;i<numSeqs;i++){     
863                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
864     
865                         vector<bool> restricted = chimeras;
866                         
867                         vector<vector<int> > leftDiffs(numSeqs);
868                         vector<vector<int> > leftMaps(numSeqs);
869                         vector<vector<int> > rightDiffs(numSeqs);
870                         vector<vector<int> > rightMaps(numSeqs);
871                         
872                         vector<int> singleLeft, bestLeft;
873                         vector<int> singleRight, bestRight;
874                         
875                         int bestSingleIndex, bestSingleDiff;
876                         vector<pwAlign> alignments(numSeqs);
877                         
878                         int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
879                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
880
881                         int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
882                         
883                         string dummyA, dummyB;
884                         
885             if (sequences[i].sequence.size() < 3) { 
886                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
887             }else if(comparisons >= 2){ 
888                                 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
889                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
890
891                                 int minMismatchToTrimera = numeric_limits<int>::max();
892                                 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
893                                 
894                                 if(minMismatchToChimera >= 3 && comparisons >= 3){
895                                         minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
896                                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
897                                 }
898                                 
899                                 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
900                                 
901                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
902
903                                 string type;
904                                 string chimeraRefSeq;
905                                 
906                                 if(minMismatchToChimera - minMismatchToTrimera >= 3){
907                                         type = "trimera";
908                                         chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
909                                 }
910                                 else{
911                                         type = "chimera";
912                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
913                                 }
914
915                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
916                                 
917                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
918                                 
919                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
920
921                                 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
922                                 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
923                                 
924                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
925
926                                 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
927                                 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
928                                 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
929                                 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
930                                 
931                                 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
932                                 
933                                 chimeraFile << probability << '\t';
934                                 
935                                 if(probability > cutoff){ 
936                                         chimeraFile << type << endl;
937                                         accnosFile << sequences[i].seqName << endl;
938                                         chimeras[i] = 1;
939                                         numChimeras++;
940                                 }
941                                 else{
942                                         chimeraFile << "good" << endl;
943                                 }
944                                 
945                         }
946                         else{
947                                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
948                         }
949         
950                         //report progress
951                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
952                 }
953                 
954                 if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
955                 
956                 chimeraFile.close();
957                 accnosFile.close();
958                 
959                 return numSeqs;
960         }
961         catch(exception& e) {
962                 m->errorOut(e, "ChimeraPerseusCommand", "driver");
963                 exit(1);
964         }
965 }
966 /**************************************************************************************************/
967 int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
968         try {
969                 
970                 vector<int> processIDS;
971                 int process = 1;
972                 int num = 0;
973                 
974                 //sanity check
975                 if (groups.size() < processors) { processors = groups.size(); }
976                 
977                 //divide the groups between the processors
978                 vector<linePair> lines;
979                 int numGroupsPerProcessor = groups.size() / processors;
980                 for (int i = 0; i < processors; i++) {
981                         int startIndex =  i * numGroupsPerProcessor;
982                         int endIndex = (i+1) * numGroupsPerProcessor;
983                         if(i == (processors - 1)){      endIndex = groups.size();       }
984                         lines.push_back(linePair(startIndex, endIndex));
985                 }
986                 
987 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
988                 
989                 //loop through and create all the processes you want
990                 while (process != processors) {
991                         int pid = fork();
992                         
993                         if (pid > 0) {
994                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
995                                 process++;
996                         }else if (pid == 0){
997                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
998                                 
999                                 //pass numSeqs to parent
1000                                 ofstream out;
1001                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1002                                 m->openOutputFile(tempFile, out);
1003                                 out << num << endl;
1004                                 out.close();
1005                                 
1006                                 exit(0);
1007                         }else { 
1008                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1009                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1010                                 exit(0);
1011                         }
1012                 }
1013                 
1014                 //do my part
1015                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1016                 
1017                 //force parent to wait until all the processes are done
1018                 for (int i=0;i<processIDS.size();i++) { 
1019                         int temp = processIDS[i];
1020                         wait(&temp);
1021                 }
1022                 
1023                 for (int i = 0; i < processIDS.size(); i++) {
1024                         ifstream in;
1025                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1026                         m->openInputFile(tempFile, in);
1027                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1028                         in.close(); m->mothurRemove(tempFile);
1029                 }
1030                 
1031 #else
1032                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1033                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1034                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1035                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1036                 
1037                 vector<perseusData*> pDataArray; 
1038                 DWORD   dwThreadIdArray[processors-1];
1039                 HANDLE  hThreadArray[processors-1]; 
1040                 
1041                 //Create processor worker threads.
1042                 for( int i=1; i<processors; i++ ){
1043                         // Allocate memory for thread data.
1044                         string extension = toString(i) + ".temp";
1045                         
1046                         perseusData* tempPerseus = new perseusData(hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
1047                         
1048                         pDataArray.push_back(tempPerseus);
1049                         processIDS.push_back(i);
1050                         
1051                         //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
1052                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1053                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1054                 }
1055                 
1056                 
1057                 //using the main process as a worker saves time and memory
1058                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1059                 
1060                 //Wait until all threads have terminated.
1061                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1062                         
1063                 //Close all thread handles and free memory allocations.
1064                 for(int i=0; i < pDataArray.size(); i++){
1065                         num += pDataArray[i]->count;
1066                         CloseHandle(hThreadArray[i]);
1067                         delete pDataArray[i];
1068                 }
1069 #endif          
1070                 
1071                 
1072                 //append output files
1073                 for(int i=0;i<processIDS.size();i++){
1074                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1075                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1076                         
1077                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1078                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1079                 }
1080                 
1081                 return num;     
1082                 
1083         }
1084         catch(exception& e) {
1085                 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
1086                 exit(1);
1087         }
1088 }
1089 //**********************************************************************************************************************
1090 int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
1091         try {
1092                 map<string, string>::iterator itUnique;
1093                 int total = 0;
1094                 
1095                 //edit accnos file
1096                 ifstream in2; 
1097                 m->openInputFile(accnosFileName, in2);
1098                 
1099                 ofstream out2;
1100                 m->openOutputFile(accnosFileName+".temp", out2);
1101                 
1102                 string name;
1103                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1104                 set<string>::iterator itNames;
1105                 set<string> chimerasInFile;
1106                 set<string>::iterator itChimeras;
1107                 
1108                 
1109                 while (!in2.eof()) {
1110                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1111                         
1112                         in2 >> name; m->gobble(in2);
1113                         
1114                         //find unique name
1115                         itUnique = uniqueNames.find(name);
1116                         
1117                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1118                         else {
1119                                 itChimeras = chimerasInFile.find((itUnique->second));
1120                                 
1121                                 if (itChimeras == chimerasInFile.end()) {
1122                                         out2 << itUnique->second << endl;
1123                                         chimerasInFile.insert((itUnique->second));
1124                                         total++;
1125                                 }
1126                         }
1127                 }
1128                 in2.close();
1129                 out2.close();
1130                 
1131                 m->mothurRemove(accnosFileName);
1132                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1133                 
1134                 //edit chimera file
1135                 ifstream in; 
1136                 m->openInputFile(outputFileName, in);
1137                 
1138                 ofstream out;
1139                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1140                 
1141                 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
1142                 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
1143                 string index, BestMatchName, parent1, parent2, flag;
1144                 name = "";
1145                 namesInFile.clear();    
1146                 //assumptions - in file each read will always look like 
1147                 /*                                                                              
1148                  SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
1149                  0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1150                  1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1151                  2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
1152                  3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
1153                  */
1154                 
1155                 //get and print headers
1156                 BestMatchName = m->getline(in); m->gobble(in);
1157                 out << BestMatchName << endl;
1158                 
1159                 while (!in.eof()) {
1160                         
1161                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1162                         
1163                         bool print = false;
1164                         in >> index;    m->gobble(in);
1165                         
1166                         if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
1167                                 in >> name;             m->gobble(in);
1168                                 in >> DiffsToBestMatch; m->gobble(in);
1169                                 in >> BestMatchIndex; m->gobble(in);
1170                                 in >> BestMatchName; m->gobble(in);
1171                                 in >> DiffstToChimera; m->gobble(in);
1172                                 in >> IndexofLeftParent; m->gobble(in);
1173                                 in >> IndexOfRightParent; m->gobble(in);
1174                                 in >> parent1;  m->gobble(in);
1175                                 in >> parent2;  m->gobble(in);
1176                                 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
1177                                 
1178                                 //find unique name
1179                                 itUnique = uniqueNames.find(name);
1180                                 
1181                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1182                                 else {
1183                                         name = itUnique->second;
1184                                         //is this name already in the file
1185                                         itNames = namesInFile.find((name));
1186                                         
1187                                         if (itNames == namesInFile.end()) { //no not in file
1188                                                 if (flag == "good") { //are you really a no??
1189                                                         //is this sequence really not chimeric??
1190                                                         itChimeras = chimerasInFile.find(name);
1191                                                         
1192                                                         //then you really are a no so print, otherwise skip
1193                                                         if (itChimeras == chimerasInFile.end()) { print = true; }
1194                                                 }else{ print = true; }
1195                                         }
1196                                 }
1197                                 
1198                                 if (print) {
1199                                         out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1200                                         namesInFile.insert(name);
1201                                         
1202                                         if (BestMatchName != "Null") {
1203                                                 itUnique = uniqueNames.find(BestMatchName);
1204                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1205                                                 else {  out << itUnique->second << '\t';        }                                       
1206                                         }else { out << "Null" << '\t'; }
1207                                         
1208                                         out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1209                                         
1210                                         if (parent1 != "Null") {
1211                                                 itUnique = uniqueNames.find(parent1);
1212                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1213                                                 else {  out << itUnique->second << '\t';        }
1214                                         }else { out << "Null" << '\t'; }
1215                                         
1216                                         if (parent1 != "Null") {
1217                                                 itUnique = uniqueNames.find(parent2);
1218                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1219                                                 else {  out << itUnique->second << '\t';        }
1220                                         }else { out << "Null" << '\t'; }
1221                                         
1222                                         out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
1223                                 }
1224                         }else { index = m->getline(in); m->gobble(in); }
1225                 }
1226                 in.close();
1227                 out.close();
1228                 
1229                 m->mothurRemove(outputFileName);
1230                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1231                 
1232                 return total;
1233         }
1234         catch(exception& e) {
1235                 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1236                 exit(1);
1237         }
1238 }       
1239 //**********************************************************************************************************************
1240
1241