]> git.donarmstrong.com Git - mothur.git/blob - chimeraperseuscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimeraperseuscommand.cpp
1 /*
2  *  chimeraperseuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/26/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 #include "counttable.h"
14 #include "sequencecountparser.h"
15 //**********************************************************************************************************************
16 vector<string> ChimeraPerseusCommand::setParameters(){  
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
23         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
24
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
28                 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
29                 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
30                         
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string ChimeraPerseusCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
45                 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
47                 helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
48         helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
49                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
50                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
53                 helpString += "The alpha parameter ....  The default is -5.54. \n";
54                 helpString += "The beta parameter ....  The default is 0.33. \n";
55                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
56                 helpString += "The chimera.perseus command should be in the following format: \n";
57                 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
58                 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string ChimeraPerseusCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "chimera") {  pattern = "[filename],perseus.chimeras"; } 
73         else if (type == "accnos") {  pattern = "[filename],perseus.accnos"; } 
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
75         
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83 //**********************************************************************************************************************
84 ChimeraPerseusCommand::ChimeraPerseusCommand(){ 
85         try {
86                 abort = true; calledHelp = true;
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["chimera"] = tempOutNames;
90                 outputTypes["accnos"] = tempOutNames;
91         }
92         catch(exception& e) {
93                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
94                 exit(1);
95         }
96 }
97 //***************************************************************************************************************
98 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false; 
101         hasCount = false;
102         hasName = false;
103                 
104                 //allow user to run help
105                 if(option == "help") { help(); abort = true; calledHelp = true; }
106                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
107                 
108                 else {
109                         vector<string> myArray = setParameters();
110                         
111                         OptionParser parser(option);
112                         map<string,string> parameters = parser.getParameters();
113                         
114                         ValidParameters validParameter("chimera.perseus");
115                         map<string,string>::iterator it;
116                         
117                         //check to make sure all parameters are valid for command
118                         for (it = parameters.begin(); it != parameters.end(); it++) { 
119                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
120                         }
121                         
122                         vector<string> tempOutNames;
123                         outputTypes["chimera"] = tempOutNames;
124                         outputTypes["accnos"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         
130                         //check for required parameters
131                         fastafile = validParameter.validFile(parameters, "fasta", false);
132                         if (fastafile == "not found") {                                 
133                                 //if there is a current fasta file, use it
134                                 string filename = m->getFastaFile(); 
135                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
136                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
137                         }else { 
138                                 m->splitAtDash(fastafile, fastaFileNames);
139                                 
140                                 //go through files and make sure they are good, if not, then disregard them
141                                 for (int i = 0; i < fastaFileNames.size(); i++) {
142                                         
143                                         bool ignore = false;
144                                         if (fastaFileNames[i] == "current") { 
145                                                 fastaFileNames[i] = m->getFastaFile(); 
146                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
147                                                 else {  
148                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
149                                                         //erase from file list
150                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
151                                                         i--;
152                                                 }
153                                         }
154                                         
155                                         if (!ignore) {
156                                                 
157                                                 if (inputDir != "") {
158                                                         string path = m->hasPath(fastaFileNames[i]);
159                                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
161                                                 }
162                                                 
163                                                 int ableToOpen;
164                                                 ifstream in;
165                                                 
166                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
167                                                 
168                                                 //if you can't open it, try default location
169                                                 if (ableToOpen == 1) {
170                                                         if (m->getDefaultPath() != "") { //default path is set
171                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
172                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
173                                                                 ifstream in2;
174                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
175                                                                 in2.close();
176                                                                 fastaFileNames[i] = tryPath;
177                                                         }
178                                                 }
179                                                 
180                                                 if (ableToOpen == 1) {
181                                                         if (m->getOutputDir() != "") { //default path is set
182                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
183                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
184                                                                 ifstream in2;
185                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
186                                                                 in2.close();
187                                                                 fastaFileNames[i] = tryPath;
188                                                         }
189                                                 }
190                                                 
191                                                 in.close();
192                                                 
193                                                 if (ableToOpen == 1) { 
194                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
195                                                         //erase from file list
196                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
197                                                         i--;
198                                                 }else {
199                                                         m->setFastaFile(fastaFileNames[i]);
200                                                 }
201                                         }
202                                 }
203                                 
204                                 //make sure there is at least one valid file left
205                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
206                         }
207                         
208                         
209                         //check for required parameters
210                         namefile = validParameter.validFile(parameters, "name", false);
211                         if (namefile == "not found") { namefile = "";   }
212                         else { 
213                                 m->splitAtDash(namefile, nameFileNames);
214                                 
215                                 //go through files and make sure they are good, if not, then disregard them
216                                 for (int i = 0; i < nameFileNames.size(); i++) {
217                                         
218                                         bool ignore = false;
219                                         if (nameFileNames[i] == "current") { 
220                                                 nameFileNames[i] = m->getNameFile(); 
221                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
222                                                 else {  
223                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
224                                                         //erase from file list
225                                                         nameFileNames.erase(nameFileNames.begin()+i);
226                                                         i--;
227                                                 }
228                                         }
229                                         
230                                         if (!ignore) {
231                                                 
232                                                 if (inputDir != "") {
233                                                         string path = m->hasPath(nameFileNames[i]);
234                                                         //if the user has not given a path then, add inputdir. else leave path alone.
235                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
236                                                 }
237                                                 
238                                                 int ableToOpen;
239                                                 ifstream in;
240                                                 
241                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
242                                                 
243                                                 //if you can't open it, try default location
244                                                 if (ableToOpen == 1) {
245                                                         if (m->getDefaultPath() != "") { //default path is set
246                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
247                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
248                                                                 ifstream in2;
249                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
250                                                                 in2.close();
251                                                                 nameFileNames[i] = tryPath;
252                                                         }
253                                                 }
254                                                 
255                                                 if (ableToOpen == 1) {
256                                                         if (m->getOutputDir() != "") { //default path is set
257                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
258                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
259                                                                 ifstream in2;
260                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
261                                                                 in2.close();
262                                                                 nameFileNames[i] = tryPath;
263                                                         }
264                                                 }
265                                                 
266                                                 in.close();
267                                                 
268                                                 if (ableToOpen == 1) { 
269                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
270                                                         //erase from file list
271                                                         nameFileNames.erase(nameFileNames.begin()+i);
272                                                         i--;
273                                                 }else {
274                                                         m->setNameFile(nameFileNames[i]);
275                                                 }
276                                         }
277                                 }
278                         }
279             
280             if (nameFileNames.size() != 0) { hasName = true; }
281             
282             //check for required parameters
283             vector<string> countfileNames;
284                         countfile = validParameter.validFile(parameters, "count", false);
285                         if (countfile == "not found") { 
286                 countfile = "";  
287                         }else { 
288                                 m->splitAtDash(countfile, countfileNames);
289                                 
290                                 //go through files and make sure they are good, if not, then disregard them
291                                 for (int i = 0; i < countfileNames.size(); i++) {
292                                         
293                                         bool ignore = false;
294                                         if (countfileNames[i] == "current") { 
295                                                 countfileNames[i] = m->getCountTableFile(); 
296                                                 if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
297                                                 else {  
298                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
299                                                         //erase from file list
300                                                         countfileNames.erase(countfileNames.begin()+i);
301                                                         i--;
302                                                 }
303                                         }
304                                         
305                                         if (!ignore) {
306                                                 
307                                                 if (inputDir != "") {
308                                                         string path = m->hasPath(countfileNames[i]);
309                                                         //if the user has not given a path then, add inputdir. else leave path alone.
310                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
311                                                 }
312                                                 
313                                                 int ableToOpen;
314                                                 ifstream in;
315                                                 
316                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
317                                                 
318                                                 //if you can't open it, try default location
319                                                 if (ableToOpen == 1) {
320                                                         if (m->getDefaultPath() != "") { //default path is set
321                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
322                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
323                                                                 ifstream in2;
324                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
325                                                                 in2.close();
326                                                                 countfileNames[i] = tryPath;
327                                                         }
328                                                 }
329                                                 
330                                                 if (ableToOpen == 1) {
331                                                         if (m->getOutputDir() != "") { //default path is set
332                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
333                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
334                                                                 ifstream in2;
335                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
336                                                                 in2.close();
337                                                                 countfileNames[i] = tryPath;
338                                                         }
339                                                 }
340                                                 
341                                                 in.close();
342                                                 
343                                                 if (ableToOpen == 1) { 
344                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
345                                                         //erase from file list
346                                                         countfileNames.erase(countfileNames.begin()+i);
347                                                         i--;
348                                                 }else {
349                                                         m->setCountTableFile(countfileNames[i]);
350                                                 }
351                                         }
352                                 }
353                         }
354             
355             if (countfileNames.size() != 0) { hasCount = true; }
356             
357                         //make sure there is at least one valid file left
358             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
359             
360             if (!hasName && !hasCount) { 
361                 //if there is a current name file, use it, else look for current count file
362                                 string filename = m->getNameFile(); 
363                                 if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
364                                 else { 
365                     filename = m->getCountTableFile();
366                     if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
367                     else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
368                 }
369             }
370             if (!hasName && hasCount) { nameFileNames = countfileNames; }
371             
372                         if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
373                         
374                         bool hasGroup = true;
375                         groupfile = validParameter.validFile(parameters, "group", false);
376                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
377                         else { 
378                                 m->splitAtDash(groupfile, groupFileNames);
379                                 
380                                 //go through files and make sure they are good, if not, then disregard them
381                                 for (int i = 0; i < groupFileNames.size(); i++) {
382                                         
383                                         bool ignore = false;
384                                         if (groupFileNames[i] == "current") { 
385                                                 groupFileNames[i] = m->getGroupFile(); 
386                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
387                                                 else {  
388                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
389                                                         //erase from file list
390                                                         groupFileNames.erase(groupFileNames.begin()+i);
391                                                         i--;
392                                                 }
393                                         }
394                                         
395                                         if (!ignore) {
396                                                 
397                                                 if (inputDir != "") {
398                                                         string path = m->hasPath(groupFileNames[i]);
399                                                         //if the user has not given a path then, add inputdir. else leave path alone.
400                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
401                                                 }
402                                                 
403                                                 int ableToOpen;
404                                                 ifstream in;
405                                                 
406                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
407                                                 
408                                                 //if you can't open it, try default location
409                                                 if (ableToOpen == 1) {
410                                                         if (m->getDefaultPath() != "") { //default path is set
411                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
412                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
413                                                                 ifstream in2;
414                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
415                                                                 in2.close();
416                                                                 groupFileNames[i] = tryPath;
417                                                         }
418                                                 }
419                                                 
420                                                 if (ableToOpen == 1) {
421                                                         if (m->getOutputDir() != "") { //default path is set
422                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
423                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
424                                                                 ifstream in2;
425                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
426                                                                 in2.close();
427                                                                 groupFileNames[i] = tryPath;
428                                                         }
429                                                 }
430                                                 
431                                                 in.close();
432                                                 
433                                                 if (ableToOpen == 1) { 
434                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
435                                                         //erase from file list
436                                                         groupFileNames.erase(groupFileNames.begin()+i);
437                                                         i--;
438                                                 }else {
439                                                         m->setGroupFile(groupFileNames[i]);
440                                                 }
441                                         }
442                                 }
443                                 
444                                 //make sure there is at least one valid file left
445                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
446                         }
447                         
448                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
449                         
450             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
451                         
452                         //if the user changes the output directory command factory will send this info to us in the output parameter 
453                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
454                         
455                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
456                         m->setProcessors(temp);
457                         m->mothurConvert(temp, processors);
458                         
459                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
460                         m->mothurConvert(temp, cutoff);
461                         
462                         temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
463                         m->mothurConvert(temp, alpha);
464                         
465                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
466                         m->mothurConvert(temp, beta);
467             
468                         temp = validParameter.validFile(parameters, "dereplicate", false);      
469                         if (temp == "not found") { 
470                                 if (groupfile != "")    {  temp = "false";                                      }
471                                 else                    {  temp = "true";       }
472                         }
473                         dups = m->isTrue(temp);
474                 }
475         }
476         catch(exception& e) {
477                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
478                 exit(1);
479         }
480 }
481 //***************************************************************************************************************
482
483 int ChimeraPerseusCommand::execute(){
484         try{
485                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
486                 
487                                 
488                 //process each file
489                 for (int s = 0; s < fastaFileNames.size(); s++) {
490                         
491                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
492                         
493                         int start = time(NULL); 
494                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
495                         map<string, string> variables;
496                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
497                         string outputFileName = getOutputFileName("chimera", variables);
498                         string accnosFileName = getOutputFileName("accnos", variables);
499
500                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
501                         
502                         //you provided a groupfile
503                         string groupFile = "";
504                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
505                         
506                         string nameFile = "";
507                         if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
508                                 nameFile = nameFileNames[s];
509                         }else { nameFile = getNamesFile(fastaFileNames[s]); }
510                         
511                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
512                         
513                         int numSeqs = 0;
514                         int numChimeras = 0;
515             
516             if (hasCount) {
517                 CountTable* ct = new CountTable();
518                 ct->readTable(nameFile);
519                 
520                 if (ct->hasGroupInfo()) {
521                     cparser = new SequenceCountParser(fastaFileNames[s], *ct);
522                     
523                     vector<string> groups = cparser->getNamesOfGroups();
524                     
525                     if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        }  return 0; }
526                     
527                     //clears files
528                     ofstream out, out1, out2;
529                     m->openOutputFile(outputFileName, out); out.close(); 
530                     m->openOutputFile(accnosFileName, out1); out1.close();
531                     
532                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
533                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
534                     
535                     if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }                               
536                     map<string, string> uniqueNames = cparser->getAllSeqsMap();
537                     if (!dups) { 
538                         numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
539                     }
540                     delete cparser;
541
542                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
543                     
544                     if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;  } 
545                     
546                 }else {
547                     if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
548                     
549                     //read sequences and store sorted by frequency
550                     vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
551                     
552                     if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
553                     
554                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
555                 }
556                 delete ct;
557             }else {
558                 if (groupFile != "") {
559                     //Parse sequences by group
560                     parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
561                     vector<string> groups = parser->getNamesOfGroups();
562                     
563                     if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {     m->mothurRemove(outputNames[j]);        }  return 0; }
564                     
565                     //clears files
566                     ofstream out, out1, out2;
567                     m->openOutputFile(outputFileName, out); out.close(); 
568                     m->openOutputFile(accnosFileName, out1); out1.close();
569                     
570                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
571                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
572                     
573                     if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0;    }                               
574                     map<string, string> uniqueNames = parser->getAllSeqsMap();
575                     if (!dups) { 
576                         numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
577                     }
578                     delete parser;
579                     
580                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
581                     
582                     if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;  }         
583                 }else{
584                     if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
585                     
586                     //read sequences and store sorted by frequency
587                     vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
588                     
589                     if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        } return 0; }
590                     
591                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
592                 }
593                         }
594             
595                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
596                         
597                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
598                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
599                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
600                 }
601                 
602                 //set accnos file as new current accnosfile
603                 string current = "";
604                 itTypes = outputTypes.find("accnos");
605                 if (itTypes != outputTypes.end()) {
606                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
607                 }
608                 
609                 m->mothurOutEndLine();
610                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
611                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
612                 m->mothurOutEndLine();
613                 
614                 return 0;
615                 
616         }
617         catch(exception& e) {
618                 m->errorOut(e, "ChimeraPerseusCommand", "execute");
619                 exit(1);
620         }
621 }
622 //**********************************************************************************************************************
623 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
624         try {
625                 string nameFile = "";
626                 
627                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
628                 
629                 //use unique.seqs to create new name and fastafile
630                 string inputString = "fasta=" + inputFile;
631                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
632                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
633                 m->mothurCalling = true;
634         
635                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
636                 uniqueCommand->execute();
637                 
638                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
639                 
640                 delete uniqueCommand;
641                 m->mothurCalling = false;
642                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
643                 
644                 nameFile = filenames["name"][0];
645                 inputFile = filenames["fasta"][0];
646                 
647                 return nameFile;
648         }
649         catch(exception& e) {
650                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
651                 exit(1);
652         }
653 }
654 //**********************************************************************************************************************
655 int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, int start, int end, vector<string> groups){
656         try {
657                 
658                 int totalSeqs = 0;
659                 int numChimeras = 0;
660                 
661                 for (int i = start; i < end; i++) {
662                         
663                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
664                         
665                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
666                         
667                         vector<seqData> sequences = loadSequences(groups[i]);
668                         
669                         if (m->control_pressed) { return 0; }
670                         
671                         int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
672                         totalSeqs += numSeqs;
673                         
674                         if (m->control_pressed) { return 0; }
675                         
676                         //append files
677                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
678                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
679                         
680                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
681                 }       
682                 
683                 return totalSeqs;
684                 
685         }
686         catch(exception& e) {
687                 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
688                 exit(1);
689         }
690 }       
691 //**********************************************************************************************************************
692 vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
693         try {
694         bool error = false;
695                 alignLength = 0;
696         vector<seqData> sequences;
697         if (hasCount) {
698             vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
699             map<string, int> counts = cparser->getCountTable(group);
700             map<string, int>::iterator it;
701             
702             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
703                 
704                 if (m->control_pressed) {  return sequences; }
705                 
706                 it = counts.find(thisGroupsSeqs[i].getName());
707                 if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
708                 else {
709                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
710                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
711                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
712                 }
713             }
714         }else{
715             vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
716             map<string, string> nameMap = parser->getNameMap(group);
717             map<string, string>::iterator it;
718            
719             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
720                 
721                 if (m->control_pressed) {  return sequences; }
722                 
723                 it = nameMap.find(thisGroupsSeqs[i].getName());
724                 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
725                 else {
726                     int num = m->getNumNames(it->second);
727                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
728                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
729                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
730                 }
731             }
732             
733                 }
734                 
735         if (error) { m->control_pressed = true; }
736                 //sort by frequency
737                 sort(sequences.rbegin(), sequences.rend());
738                 
739                 return sequences;
740         }
741         catch(exception& e) {
742                 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
743                 exit(1);
744         }
745 }
746
747 //**********************************************************************************************************************
748 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
749         try {
750                 map<string, int>::iterator it;
751                 map<string, int> nameMap = m->readNames(name);
752                 
753                 //read fasta file and create sequenceData structure - checking for file mismatches
754                 vector<seqData> sequences;
755                 bool error = false;
756                 ifstream in;
757                 m->openInputFile(inputFile, in);
758                 alignLength = 0;
759         
760                 while (!in.eof()) {
761                         
762                         if (m->control_pressed) { in.close(); return sequences; }
763                         
764                         Sequence temp(in); m->gobble(in);
765                         
766                         it = nameMap.find(temp.getName());
767                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
768                         else {
769                 temp.setAligned(removeNs(temp.getUnaligned()));
770                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
771                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
772                         }
773                 }
774                 in.close();
775                 
776                 if (error) { m->control_pressed = true; }
777                 
778                 //sort by frequency
779                 sort(sequences.rbegin(), sequences.rend());
780                 
781                 return sequences;
782         }
783         catch(exception& e) {
784                 m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
785                 exit(1);
786         }
787 }
788 //**********************************************************************************************************************
789 string ChimeraPerseusCommand::removeNs(string seq){
790         try {
791         string newSeq = "";
792         for (int i = 0; i < seq.length(); i++) {
793             if (seq[i] != 'N') {  newSeq += seq[i]; }
794         }
795         return newSeq;
796     }
797         catch(exception& e) {
798                 m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
799                 exit(1);
800         }
801 }
802 //**********************************************************************************************************************
803 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
804         try {           
805                 //read fasta file and create sequenceData structure - checking for file mismatches
806                 vector<seqData> sequences;
807                 ifstream in;
808                 m->openInputFile(inputFile, in);
809                 alignLength = 0;
810         
811                 while (!in.eof()) {
812             Sequence temp(in); m->gobble(in);
813                         
814                         int count = ct->getNumSeqs(temp.getName());
815                         if (m->control_pressed) { break; }
816                         else {
817                 temp.setAligned(removeNs(temp.getUnaligned()));
818                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
819                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
820                         }
821                 }
822                 in.close();
823                 
824                 //sort by frequency
825                 sort(sequences.rbegin(), sequences.rend());
826                 
827                 return sequences;
828         }
829         catch(exception& e) {
830                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
831                 exit(1);
832         }
833 }
834 //**********************************************************************************************************************
835 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
836         try {
837                 
838                 vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
839                 for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
840                 
841                 correctModel[0][0] = 0.000000;  //AA
842                 correctModel[1][0] = 11.619259; //CA
843                 correctModel[2][0] = 11.694004; //TA
844                 correctModel[3][0] = 7.748623;  //GA
845                 
846                 correctModel[1][1] = 0.000000;  //CC
847                 correctModel[2][1] = 7.619657;  //TC
848                 correctModel[3][1] = 12.852562; //GC
849                 
850                 correctModel[2][2] = 0.000000;  //TT
851                 correctModel[3][2] = 10.964048; //TG
852                 
853                 correctModel[3][3] = 0.000000;  //GG
854                 
855                 for(int i=0;i<4;i++){
856                         for(int j=0;j<i;j++){
857                                 correctModel[j][i] = correctModel[i][j];
858                         }
859                 }
860                 
861                 int numSeqs = sequences.size();
862                 //int alignLength = sequences[0].sequence.size();
863                 
864                 ofstream chimeraFile;
865                 ofstream accnosFile;
866                 m->openOutputFile(chimeraFileName, chimeraFile); 
867                 m->openOutputFile(accnosFileName, accnosFile); 
868                 
869                 Perseus myPerseus;
870                 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
871                 
872                 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
873                 
874                 vector<bool> chimeras(numSeqs, 0);
875                 
876                 for(int i=0;i<numSeqs;i++){     
877                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
878     
879                         vector<bool> restricted = chimeras;
880                         
881                         vector<vector<int> > leftDiffs(numSeqs);
882                         vector<vector<int> > leftMaps(numSeqs);
883                         vector<vector<int> > rightDiffs(numSeqs);
884                         vector<vector<int> > rightMaps(numSeqs);
885                         
886                         vector<int> singleLeft, bestLeft;
887                         vector<int> singleRight, bestRight;
888                         
889                         int bestSingleIndex, bestSingleDiff;
890                         vector<pwAlign> alignments(numSeqs);
891                         
892                         int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
893                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
894
895                         int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
896                         
897                         string dummyA, dummyB;
898                         
899             if (sequences[i].sequence.size() < 3) { 
900                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
901             }else if(comparisons >= 2){ 
902                                 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
903                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
904
905                                 int minMismatchToTrimera = numeric_limits<int>::max();
906                                 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
907                                 
908                                 if(minMismatchToChimera >= 3 && comparisons >= 3){
909                                         minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
910                                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
911                                 }
912                                 
913                                 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
914                                 
915                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
916
917                                 string type;
918                                 string chimeraRefSeq;
919                                 
920                                 if(minMismatchToChimera - minMismatchToTrimera >= 3){
921                                         type = "trimera";
922                                         chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
923                                 }
924                                 else{
925                                         type = "chimera";
926                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
927                                 }
928
929                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
930                                 
931                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
932                                 
933                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
934
935                                 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
936                                 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
937                                 
938                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
939
940                                 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
941                                 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
942                                 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
943                                 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
944                                 
945                                 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
946                                 
947                                 chimeraFile << probability << '\t';
948                                 
949                                 if(probability > cutoff){ 
950                                         chimeraFile << type << endl;
951                                         accnosFile << sequences[i].seqName << endl;
952                                         chimeras[i] = 1;
953                                         numChimeras++;
954                                 }
955                                 else{
956                                         chimeraFile << "good" << endl;
957                                 }
958                                 
959                         }
960                         else{
961                                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
962                         }
963         
964                         //report progress
965                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
966                 }
967                 
968                 if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
969                 
970                 chimeraFile.close();
971                 accnosFile.close();
972                 
973                 return numSeqs;
974         }
975         catch(exception& e) {
976                 m->errorOut(e, "ChimeraPerseusCommand", "driver");
977                 exit(1);
978         }
979 }
980 /**************************************************************************************************/
981 int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
982         try {
983                 
984                 vector<int> processIDS;
985                 int process = 1;
986                 int num = 0;
987                 
988                 //sanity check
989                 if (groups.size() < processors) { processors = groups.size(); }
990                 
991                 //divide the groups between the processors
992                 vector<linePair> lines;
993                 int numGroupsPerProcessor = groups.size() / processors;
994                 for (int i = 0; i < processors; i++) {
995                         int startIndex =  i * numGroupsPerProcessor;
996                         int endIndex = (i+1) * numGroupsPerProcessor;
997                         if(i == (processors - 1)){      endIndex = groups.size();       }
998                         lines.push_back(linePair(startIndex, endIndex));
999                 }
1000                 
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1002                 
1003                 //loop through and create all the processes you want
1004                 while (process != processors) {
1005                         int pid = fork();
1006                         
1007                         if (pid > 0) {
1008                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1009                                 process++;
1010                         }else if (pid == 0){
1011                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1012                                 
1013                                 //pass numSeqs to parent
1014                                 ofstream out;
1015                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1016                                 m->openOutputFile(tempFile, out);
1017                                 out << num << endl;
1018                                 out.close();
1019                                 
1020                                 exit(0);
1021                         }else { 
1022                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1023                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1024                                 exit(0);
1025                         }
1026                 }
1027                 
1028                 //do my part
1029                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1030                 
1031                 //force parent to wait until all the processes are done
1032                 for (int i=0;i<processIDS.size();i++) { 
1033                         int temp = processIDS[i];
1034                         wait(&temp);
1035                 }
1036                 
1037                 for (int i = 0; i < processIDS.size(); i++) {
1038                         ifstream in;
1039                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1040                         m->openInputFile(tempFile, in);
1041                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1042                         in.close(); m->mothurRemove(tempFile);
1043                 }
1044                 
1045 #else
1046                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1047                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1048                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1049                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1050                 
1051                 vector<perseusData*> pDataArray; 
1052                 DWORD   dwThreadIdArray[processors-1];
1053                 HANDLE  hThreadArray[processors-1]; 
1054                 
1055                 //Create processor worker threads.
1056                 for( int i=1; i<processors; i++ ){
1057                         // Allocate memory for thread data.
1058                         string extension = toString(i) + ".temp";
1059                         
1060                         perseusData* tempPerseus = new perseusData(hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
1061                         
1062                         pDataArray.push_back(tempPerseus);
1063                         processIDS.push_back(i);
1064                         
1065                         //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
1066                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1067                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1068                 }
1069                 
1070                 
1071                 //using the main process as a worker saves time and memory
1072                 num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
1073                 
1074                 //Wait until all threads have terminated.
1075                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1076                         
1077                 //Close all thread handles and free memory allocations.
1078                 for(int i=0; i < pDataArray.size(); i++){
1079             if (pDataArray[i]->count != pDataArray[i]->end) {
1080                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
1081             }
1082                         num += pDataArray[i]->count;
1083                         CloseHandle(hThreadArray[i]);
1084                         delete pDataArray[i];
1085                 }
1086 #endif          
1087                 
1088                 
1089                 //append output files
1090                 for(int i=0;i<processIDS.size();i++){
1091                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1092                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1093                         
1094                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1095                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1096                 }
1097                 
1098                 return num;     
1099                 
1100         }
1101         catch(exception& e) {
1102                 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
1103                 exit(1);
1104         }
1105 }
1106 //**********************************************************************************************************************
1107 int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
1108         try {
1109                 map<string, string>::iterator itUnique;
1110                 int total = 0;
1111                 
1112                 //edit accnos file
1113                 ifstream in2; 
1114                 m->openInputFile(accnosFileName, in2);
1115                 
1116                 ofstream out2;
1117                 m->openOutputFile(accnosFileName+".temp", out2);
1118                 
1119                 string name;
1120                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1121                 set<string>::iterator itNames;
1122                 set<string> chimerasInFile;
1123                 set<string>::iterator itChimeras;
1124                 
1125                 
1126                 while (!in2.eof()) {
1127                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1128                         
1129                         in2 >> name; m->gobble(in2);
1130                         
1131                         //find unique name
1132                         itUnique = uniqueNames.find(name);
1133                         
1134                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1135                         else {
1136                                 itChimeras = chimerasInFile.find((itUnique->second));
1137                                 
1138                                 if (itChimeras == chimerasInFile.end()) {
1139                                         out2 << itUnique->second << endl;
1140                                         chimerasInFile.insert((itUnique->second));
1141                                         total++;
1142                                 }
1143                         }
1144                 }
1145                 in2.close();
1146                 out2.close();
1147                 
1148                 m->mothurRemove(accnosFileName);
1149                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1150                 
1151                 //edit chimera file
1152                 ifstream in; 
1153                 m->openInputFile(outputFileName, in);
1154                 
1155                 ofstream out;
1156                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1157                 
1158                 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
1159                 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
1160                 string index, BestMatchName, parent1, parent2, flag;
1161                 name = "";
1162                 namesInFile.clear();    
1163                 //assumptions - in file each read will always look like 
1164                 /*                                                                              
1165                  SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
1166                  0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1167                  1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1168                  2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
1169                  3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
1170                  */
1171                 
1172                 //get and print headers
1173                 BestMatchName = m->getline(in); m->gobble(in);
1174                 out << BestMatchName << endl;
1175                 
1176                 while (!in.eof()) {
1177                         
1178                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1179                         
1180                         bool print = false;
1181                         in >> index;    m->gobble(in);
1182                         
1183                         if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
1184                                 in >> name;             m->gobble(in);
1185                                 in >> DiffsToBestMatch; m->gobble(in);
1186                                 in >> BestMatchIndex; m->gobble(in);
1187                                 in >> BestMatchName; m->gobble(in);
1188                                 in >> DiffstToChimera; m->gobble(in);
1189                                 in >> IndexofLeftParent; m->gobble(in);
1190                                 in >> IndexOfRightParent; m->gobble(in);
1191                                 in >> parent1;  m->gobble(in);
1192                                 in >> parent2;  m->gobble(in);
1193                                 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
1194                                 
1195                                 //find unique name
1196                                 itUnique = uniqueNames.find(name);
1197                                 
1198                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1199                                 else {
1200                                         name = itUnique->second;
1201                                         //is this name already in the file
1202                                         itNames = namesInFile.find((name));
1203                                         
1204                                         if (itNames == namesInFile.end()) { //no not in file
1205                                                 if (flag == "good") { //are you really a no??
1206                                                         //is this sequence really not chimeric??
1207                                                         itChimeras = chimerasInFile.find(name);
1208                                                         
1209                                                         //then you really are a no so print, otherwise skip
1210                                                         if (itChimeras == chimerasInFile.end()) { print = true; }
1211                                                 }else{ print = true; }
1212                                         }
1213                                 }
1214                                 
1215                                 if (print) {
1216                                         out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1217                                         namesInFile.insert(name);
1218                                         
1219                                         if (BestMatchName != "Null") {
1220                                                 itUnique = uniqueNames.find(BestMatchName);
1221                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1222                                                 else {  out << itUnique->second << '\t';        }                                       
1223                                         }else { out << "Null" << '\t'; }
1224                                         
1225                                         out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1226                                         
1227                                         if (parent1 != "Null") {
1228                                                 itUnique = uniqueNames.find(parent1);
1229                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1230                                                 else {  out << itUnique->second << '\t';        }
1231                                         }else { out << "Null" << '\t'; }
1232                                         
1233                                         if (parent1 != "Null") {
1234                                                 itUnique = uniqueNames.find(parent2);
1235                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1236                                                 else {  out << itUnique->second << '\t';        }
1237                                         }else { out << "Null" << '\t'; }
1238                                         
1239                                         out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
1240                                 }
1241                         }else { index = m->getline(in); m->gobble(in); }
1242                 }
1243                 in.close();
1244                 out.close();
1245                 
1246                 m->mothurRemove(outputFileName);
1247                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1248                 
1249                 return total;
1250         }
1251         catch(exception& e) {
1252                 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1253                 exit(1);
1254         }
1255 }       
1256 //**********************************************************************************************************************
1257
1258