]> git.donarmstrong.com Git - mothur.git/blob - chimeraperseuscommand.cpp
842a65ee22fd54c730143cbed94f91688dba8006
[mothur.git] / chimeraperseuscommand.cpp
1 /*
2  *  chimeraperseuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/26/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 #include "counttable.h"
14 #include "sequencecountparser.h"
15 //**********************************************************************************************************************
16 vector<string> ChimeraPerseusCommand::setParameters(){  
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
23         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
24
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
28                 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
29                 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
30                         
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string ChimeraPerseusCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
45                 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
47                 helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
48         helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed.\n";
49                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
50                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
53                 helpString += "The alpha parameter ....  The default is -5.54. \n";
54                 helpString += "The beta parameter ....  The default is 0.33. \n";
55                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
56                 helpString += "The chimera.perseus command should be in the following format: \n";
57                 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
58                 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string ChimeraPerseusCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "chimera") {  pattern = "[filename],perseus.chimeras"; } 
73         else if (type == "accnos") {  pattern = "[filename],perseus.accnos"; }
74         else if (type == "count") {  pattern = "[filename],perseus.pick.count_table"; }
75         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
76         
77         return pattern;
78     }
79     catch(exception& e) {
80         m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
81         exit(1);
82     }
83 }
84 //**********************************************************************************************************************
85 ChimeraPerseusCommand::ChimeraPerseusCommand(){ 
86         try {
87                 abort = true; calledHelp = true;
88                 setParameters();
89                 vector<string> tempOutNames;
90                 outputTypes["chimera"] = tempOutNames;
91                 outputTypes["accnos"] = tempOutNames;
92         outputTypes["count"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
96                 exit(1);
97         }
98 }
99 //***************************************************************************************************************
100 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
101         try {
102                 abort = false; calledHelp = false; 
103         hasCount = false;
104         hasName = false;
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter("chimera.perseus");
117                         map<string,string>::iterator it;
118                         
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         vector<string> tempOutNames;
125                         outputTypes["chimera"] = tempOutNames;
126                         outputTypes["accnos"] = tempOutNames;
127             outputTypes["count"] = tempOutNames;
128                         
129                         //if the user changes the input directory command factory will send this info to us in the output parameter 
130                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
131                         if (inputDir == "not found"){   inputDir = "";          }
132                         
133                         //check for required parameters
134                         fastafile = validParameter.validFile(parameters, "fasta", false);
135                         if (fastafile == "not found") {                                 
136                                 //if there is a current fasta file, use it
137                                 string filename = m->getFastaFile(); 
138                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
139                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
140                         }else { 
141                                 m->splitAtDash(fastafile, fastaFileNames);
142                                 
143                                 //go through files and make sure they are good, if not, then disregard them
144                                 for (int i = 0; i < fastaFileNames.size(); i++) {
145                                         
146                                         bool ignore = false;
147                                         if (fastaFileNames[i] == "current") { 
148                                                 fastaFileNames[i] = m->getFastaFile(); 
149                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
150                                                 else {  
151                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
152                                                         //erase from file list
153                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
154                                                         i--;
155                                                 }
156                                         }
157                                         
158                                         if (!ignore) {
159                                                 
160                                                 if (inputDir != "") {
161                                                         string path = m->hasPath(fastaFileNames[i]);
162                                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
164                                                 }
165                                                 
166                                                 int ableToOpen;
167                                                 ifstream in;
168                                                 
169                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
170                                                 
171                                                 //if you can't open it, try default location
172                                                 if (ableToOpen == 1) {
173                                                         if (m->getDefaultPath() != "") { //default path is set
174                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
175                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
176                                                                 ifstream in2;
177                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
178                                                                 in2.close();
179                                                                 fastaFileNames[i] = tryPath;
180                                                         }
181                                                 }
182                                                 
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getOutputDir() != "") { //default path is set
185                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 in.close();
195                                                 
196                                                 if (ableToOpen == 1) { 
197                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
198                                                         //erase from file list
199                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
200                                                         i--;
201                                                 }else {
202                                                         m->setFastaFile(fastaFileNames[i]);
203                                                 }
204                                         }
205                                 }
206                                 
207                                 //make sure there is at least one valid file left
208                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
209                         }
210                         
211                         
212                         //check for required parameters
213                         namefile = validParameter.validFile(parameters, "name", false);
214                         if (namefile == "not found") { namefile = "";   }
215                         else { 
216                                 m->splitAtDash(namefile, nameFileNames);
217                                 
218                                 //go through files and make sure they are good, if not, then disregard them
219                                 for (int i = 0; i < nameFileNames.size(); i++) {
220                                         
221                                         bool ignore = false;
222                                         if (nameFileNames[i] == "current") { 
223                                                 nameFileNames[i] = m->getNameFile(); 
224                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
225                                                 else {  
226                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
227                                                         //erase from file list
228                                                         nameFileNames.erase(nameFileNames.begin()+i);
229                                                         i--;
230                                                 }
231                                         }
232                                         
233                                         if (!ignore) {
234                                                 
235                                                 if (inputDir != "") {
236                                                         string path = m->hasPath(nameFileNames[i]);
237                                                         //if the user has not given a path then, add inputdir. else leave path alone.
238                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
239                                                 }
240                                                 
241                                                 int ableToOpen;
242                                                 ifstream in;
243                                                 
244                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
245                                                 
246                                                 //if you can't open it, try default location
247                                                 if (ableToOpen == 1) {
248                                                         if (m->getDefaultPath() != "") { //default path is set
249                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
250                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
251                                                                 ifstream in2;
252                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
253                                                                 in2.close();
254                                                                 nameFileNames[i] = tryPath;
255                                                         }
256                                                 }
257                                                 
258                                                 if (ableToOpen == 1) {
259                                                         if (m->getOutputDir() != "") { //default path is set
260                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
261                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
262                                                                 ifstream in2;
263                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
264                                                                 in2.close();
265                                                                 nameFileNames[i] = tryPath;
266                                                         }
267                                                 }
268                                                 
269                                                 in.close();
270                                                 
271                                                 if (ableToOpen == 1) { 
272                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
273                                                         //erase from file list
274                                                         nameFileNames.erase(nameFileNames.begin()+i);
275                                                         i--;
276                                                 }else {
277                                                         m->setNameFile(nameFileNames[i]);
278                                                 }
279                                         }
280                                 }
281                         }
282             
283             if (nameFileNames.size() != 0) { hasName = true; }
284             
285             //check for required parameters
286             vector<string> countfileNames;
287                         countfile = validParameter.validFile(parameters, "count", false);
288                         if (countfile == "not found") { 
289                 countfile = "";  
290                         }else { 
291                                 m->splitAtDash(countfile, countfileNames);
292                                 
293                                 //go through files and make sure they are good, if not, then disregard them
294                                 for (int i = 0; i < countfileNames.size(); i++) {
295                                         
296                                         bool ignore = false;
297                                         if (countfileNames[i] == "current") { 
298                                                 countfileNames[i] = m->getCountTableFile(); 
299                                                 if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
300                                                 else {  
301                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
302                                                         //erase from file list
303                                                         countfileNames.erase(countfileNames.begin()+i);
304                                                         i--;
305                                                 }
306                                         }
307                                         
308                                         if (!ignore) {
309                                                 
310                                                 if (inputDir != "") {
311                                                         string path = m->hasPath(countfileNames[i]);
312                                                         //if the user has not given a path then, add inputdir. else leave path alone.
313                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
314                                                 }
315                                                 
316                                                 int ableToOpen;
317                                                 ifstream in;
318                                                 
319                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
320                                                 
321                                                 //if you can't open it, try default location
322                                                 if (ableToOpen == 1) {
323                                                         if (m->getDefaultPath() != "") { //default path is set
324                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
325                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
326                                                                 ifstream in2;
327                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
328                                                                 in2.close();
329                                                                 countfileNames[i] = tryPath;
330                                                         }
331                                                 }
332                                                 
333                                                 if (ableToOpen == 1) {
334                                                         if (m->getOutputDir() != "") { //default path is set
335                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
336                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
337                                                                 ifstream in2;
338                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
339                                                                 in2.close();
340                                                                 countfileNames[i] = tryPath;
341                                                         }
342                                                 }
343                                                 
344                                                 in.close();
345                                                 
346                                                 if (ableToOpen == 1) { 
347                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
348                                                         //erase from file list
349                                                         countfileNames.erase(countfileNames.begin()+i);
350                                                         i--;
351                                                 }else {
352                                                         m->setCountTableFile(countfileNames[i]);
353                                                 }
354                                         }
355                                 }
356                         }
357             
358             if (countfileNames.size() != 0) { hasCount = true; }
359             
360                         //make sure there is at least one valid file left
361             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
362             
363             if (!hasName && !hasCount) { 
364                 //if there is a current name file, use it, else look for current count file
365                                 string filename = m->getNameFile(); 
366                                 if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
367                                 else { 
368                     filename = m->getCountTableFile();
369                     if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
370                     else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
371                 }
372             }
373             if (!hasName && hasCount) { nameFileNames = countfileNames; }
374             
375                         if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
376                         
377                         bool hasGroup = true;
378                         groupfile = validParameter.validFile(parameters, "group", false);
379                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
380                         else { 
381                                 m->splitAtDash(groupfile, groupFileNames);
382                                 
383                                 //go through files and make sure they are good, if not, then disregard them
384                                 for (int i = 0; i < groupFileNames.size(); i++) {
385                                         
386                                         bool ignore = false;
387                                         if (groupFileNames[i] == "current") { 
388                                                 groupFileNames[i] = m->getGroupFile(); 
389                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
390                                                 else {  
391                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
392                                                         //erase from file list
393                                                         groupFileNames.erase(groupFileNames.begin()+i);
394                                                         i--;
395                                                 }
396                                         }
397                                         
398                                         if (!ignore) {
399                                                 
400                                                 if (inputDir != "") {
401                                                         string path = m->hasPath(groupFileNames[i]);
402                                                         //if the user has not given a path then, add inputdir. else leave path alone.
403                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
404                                                 }
405                                                 
406                                                 int ableToOpen;
407                                                 ifstream in;
408                                                 
409                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
410                                                 
411                                                 //if you can't open it, try default location
412                                                 if (ableToOpen == 1) {
413                                                         if (m->getDefaultPath() != "") { //default path is set
414                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
415                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
416                                                                 ifstream in2;
417                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
418                                                                 in2.close();
419                                                                 groupFileNames[i] = tryPath;
420                                                         }
421                                                 }
422                                                 
423                                                 if (ableToOpen == 1) {
424                                                         if (m->getOutputDir() != "") { //default path is set
425                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
426                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
427                                                                 ifstream in2;
428                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
429                                                                 in2.close();
430                                                                 groupFileNames[i] = tryPath;
431                                                         }
432                                                 }
433                                                 
434                                                 in.close();
435                                                 
436                                                 if (ableToOpen == 1) { 
437                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
438                                                         //erase from file list
439                                                         groupFileNames.erase(groupFileNames.begin()+i);
440                                                         i--;
441                                                 }else {
442                                                         m->setGroupFile(groupFileNames[i]);
443                                                 }
444                                         }
445                                 }
446                                 
447                                 //make sure there is at least one valid file left
448                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
449                         }
450                         
451                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
452                         
453             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
454                         
455                         //if the user changes the output directory command factory will send this info to us in the output parameter 
456                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
457                         
458                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
459                         m->setProcessors(temp);
460                         m->mothurConvert(temp, processors);
461                         
462                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
463                         m->mothurConvert(temp, cutoff);
464                         
465                         temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
466                         m->mothurConvert(temp, alpha);
467                         
468                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
469                         m->mothurConvert(temp, beta);
470             
471                         temp = validParameter.validFile(parameters, "dereplicate", false);      
472                         if (temp == "not found") { 
473                                 if (groupfile != "")    {  temp = "false";                                      }
474                                 else                    {  temp = "true";       }
475                         }
476                         dups = m->isTrue(temp);
477                 }
478         }
479         catch(exception& e) {
480                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
481                 exit(1);
482         }
483 }
484 //***************************************************************************************************************
485
486 int ChimeraPerseusCommand::execute(){
487         try{
488                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
489                 
490                                 
491                 //process each file
492                 for (int s = 0; s < fastaFileNames.size(); s++) {
493                         
494                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
495                         
496                         int start = time(NULL); 
497                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
498                         map<string, string> variables;
499                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
500                         string outputFileName = getOutputFileName("chimera", variables);
501                         string accnosFileName = getOutputFileName("accnos", variables);
502             string newCountFile = "";
503
504                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
505                         
506                         //you provided a groupfile
507                         string groupFile = "";
508                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
509                         
510                         string nameFile = "";
511                         if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
512                                 nameFile = nameFileNames[s];
513                         }else { nameFile = getNamesFile(fastaFileNames[s]); }
514                         
515                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
516                         
517                         int numSeqs = 0;
518                         int numChimeras = 0;
519             
520             if (hasCount) {
521                 CountTable* ct = new CountTable();
522                 ct->readTable(nameFile);
523                 
524                 if (ct->hasGroupInfo()) {
525                     cparser = new SequenceCountParser(fastaFileNames[s], *ct);
526                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
527                     newCountFile = getOutputFileName("count", variables);
528                     
529                     vector<string> groups = cparser->getNamesOfGroups();
530                     
531                     if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        }  return 0; }
532                     
533                     //clears files
534                     ofstream out, out1, out2;
535                     m->openOutputFile(outputFileName, out); out.close(); 
536                     m->openOutputFile(accnosFileName, out1); out1.close();
537                     
538                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, newCountFile, 0, groups.size(), groups);
539                         if (dups) {
540                             CountTable c; c.readTable(nameFile);
541                             if (!m->isBlank(newCountFile)) {
542                                 ifstream in2;
543                                 m->openInputFile(newCountFile, in2);
544                                 
545                                 string name, group;
546                                 while (!in2.eof()) {
547                                     in2 >> name >> group; m->gobble(in2);
548                                     c.setAbund(name, group, 0);
549                                 }
550                                 in2.close();
551                             }
552                             m->mothurRemove(newCountFile);
553                             c.printTable(newCountFile);
554                         }
555
556                     }
557                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, newCountFile, groups, groupFile, fastaFileNames[s], nameFile);                  }
558                     
559                     if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }                               
560                     map<string, string> uniqueNames = cparser->getAllSeqsMap();
561                     if (!dups) { 
562                         numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
563                     }else {
564                         set<string> doNotRemove;
565                         CountTable c; c.readTable(newCountFile);
566                         vector<string> namesInTable = c.getNamesOfSeqs();
567                         for (int i = 0; i < namesInTable.size(); i++) {
568                             int temp = c.getNumSeqs(namesInTable[i]);
569                             if (temp == 0) {  c.remove(namesInTable[i]);  }
570                             else { doNotRemove.insert((namesInTable[i])); }
571                         }
572                         //remove names we want to keep from accnos file.
573                         set<string> accnosNames = m->readAccnos(accnosFileName);
574                         ofstream out2;
575                         m->openOutputFile(accnosFileName, out2);
576                         for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
577                             if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
578                         }
579                         out2.close();
580                         c.printTable(newCountFile);
581                         outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
582
583                     }
584                     delete cparser;
585
586                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
587                     
588                     if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;  } 
589                     
590                 }else {
591                     if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
592                     
593                     //read sequences and store sorted by frequency
594                     vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
595                     
596                     if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
597                     
598                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
599                 }
600                 delete ct;
601             }else {
602                 if (groupFile != "") {
603                     //Parse sequences by group
604                     parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
605                     vector<string> groups = parser->getNamesOfGroups();
606                     
607                     if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {     m->mothurRemove(outputNames[j]);        }  return 0; }
608                     
609                     //clears files
610                     ofstream out, out1, out2;
611                     m->openOutputFile(outputFileName, out); out.close(); 
612                     m->openOutputFile(accnosFileName, out1); out1.close();
613                     
614                     if(processors == 1) {       numSeqs = driverGroups(outputFileName, accnosFileName, "", 0, groups.size(), groups);   }
615                     else                                {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, "", groups, groupFile, fastaFileNames[s], nameFile);                    }
616                     
617                     if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0;    }                               
618                     map<string, string> uniqueNames = parser->getAllSeqsMap();
619                     if (!dups) { 
620                         numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
621                     }
622                     delete parser;
623                     
624                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
625                     
626                     if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;  }         
627                 }else{
628                     if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
629                     
630                     //read sequences and store sorted by frequency
631                     vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
632                     
633                     if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        } return 0; }
634                     
635                     numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
636                 }
637                         }
638             
639                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
640                         
641                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
642                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
643                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
644                 }
645                 
646                 //set accnos file as new current accnosfile
647                 string current = "";
648                 itTypes = outputTypes.find("accnos");
649                 if (itTypes != outputTypes.end()) {
650                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
651                 }
652         
653         itTypes = outputTypes.find("count");
654                 if (itTypes != outputTypes.end()) {
655                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
656                 }
657                 
658                 m->mothurOutEndLine();
659                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
660                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
661                 m->mothurOutEndLine();
662                 
663                 return 0;
664                 
665         }
666         catch(exception& e) {
667                 m->errorOut(e, "ChimeraPerseusCommand", "execute");
668                 exit(1);
669         }
670 }
671 //**********************************************************************************************************************
672 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
673         try {
674                 string nameFile = "";
675                 
676                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
677                 
678                 //use unique.seqs to create new name and fastafile
679                 string inputString = "fasta=" + inputFile;
680                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
681                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
682                 m->mothurCalling = true;
683         
684                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
685                 uniqueCommand->execute();
686                 
687                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
688                 
689                 delete uniqueCommand;
690                 m->mothurCalling = false;
691                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
692                 
693                 nameFile = filenames["name"][0];
694                 inputFile = filenames["fasta"][0];
695                 
696                 return nameFile;
697         }
698         catch(exception& e) {
699                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
700                 exit(1);
701         }
702 }
703 //**********************************************************************************************************************
704 int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, string countlist, int start, int end, vector<string> groups){
705         try {
706                 
707                 int totalSeqs = 0;
708                 int numChimeras = 0;
709         
710         ofstream outCountList;
711         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
712                 
713                 for (int i = start; i < end; i++) {
714                         
715                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
716                         
717                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
718                         
719                         vector<seqData> sequences = loadSequences(groups[i]);
720                         
721                         if (m->control_pressed) { return 0; }
722                         
723                         int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
724                         totalSeqs += numSeqs;
725                         
726                         if (m->control_pressed) { return 0; }
727             
728             if (dups) {
729                 if (!m->isBlank(accnos+groups[i])) {
730                     ifstream in;
731                     m->openInputFile(accnos+groups[i], in);
732                     string name;
733                     if (hasCount) {
734                         while (!in.eof()) {
735                             in >> name; m->gobble(in);
736                             outCountList << name << '\t' << groups[i] << endl;
737                         }
738                         in.close();
739                     }else {
740                         map<string, string> thisnamemap = parser->getNameMap(groups[i]);
741                         map<string, string>::iterator itN;
742                         ofstream out;
743                         m->openOutputFile(accnos+groups[i]+".temp", out);
744                         while (!in.eof()) {
745                             in >> name; m->gobble(in);
746                             itN = thisnamemap.find(name);
747                             if (itN != thisnamemap.end()) {
748                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
749                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
750                                 
751                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
752                         }
753                         out.close();
754                         in.close();
755                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
756                     }
757                     
758                 }
759             }
760                         
761                         //append files
762                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
763                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
764                         
765                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
766                 }       
767                 
768         if (hasCount && dups) { outCountList.close(); }
769         
770                 return totalSeqs;
771                 
772         }
773         catch(exception& e) {
774                 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
775                 exit(1);
776         }
777 }       
778 //**********************************************************************************************************************
779 vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
780         try {
781         bool error = false;
782                 alignLength = 0;
783         vector<seqData> sequences;
784         if (hasCount) {
785             vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
786             map<string, int> counts = cparser->getCountTable(group);
787             map<string, int>::iterator it;
788             
789             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
790                 
791                 if (m->control_pressed) {  return sequences; }
792                 
793                 it = counts.find(thisGroupsSeqs[i].getName());
794                 if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
795                 else {
796                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
797                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
798                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
799                 }
800             }
801         }else{
802             vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
803             map<string, string> nameMap = parser->getNameMap(group);
804             map<string, string>::iterator it;
805            
806             for (int i = 0; i < thisGroupsSeqs.size(); i++) {
807                 
808                 if (m->control_pressed) {  return sequences; }
809                 
810                 it = nameMap.find(thisGroupsSeqs[i].getName());
811                 if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
812                 else {
813                     int num = m->getNumNames(it->second);
814                     thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
815                     sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
816                     if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
817                 }
818             }
819             
820                 }
821                 
822         if (error) { m->control_pressed = true; }
823                 //sort by frequency
824                 sort(sequences.rbegin(), sequences.rend());
825                 
826                 return sequences;
827         }
828         catch(exception& e) {
829                 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
830                 exit(1);
831         }
832 }
833
834 //**********************************************************************************************************************
835 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
836         try {
837                 map<string, int>::iterator it;
838                 map<string, int> nameMap = m->readNames(name);
839                 
840                 //read fasta file and create sequenceData structure - checking for file mismatches
841                 vector<seqData> sequences;
842                 bool error = false;
843                 ifstream in;
844                 m->openInputFile(inputFile, in);
845                 alignLength = 0;
846         
847                 while (!in.eof()) {
848                         
849                         if (m->control_pressed) { in.close(); return sequences; }
850                         
851                         Sequence temp(in); m->gobble(in);
852                         
853                         it = nameMap.find(temp.getName());
854                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
855                         else {
856                 temp.setAligned(removeNs(temp.getUnaligned()));
857                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
858                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
859                         }
860                 }
861                 in.close();
862                 
863                 if (error) { m->control_pressed = true; }
864                 
865                 //sort by frequency
866                 sort(sequences.rbegin(), sequences.rend());
867                 
868                 return sequences;
869         }
870         catch(exception& e) {
871                 m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
872                 exit(1);
873         }
874 }
875 //**********************************************************************************************************************
876 string ChimeraPerseusCommand::removeNs(string seq){
877         try {
878         string newSeq = "";
879         for (int i = 0; i < seq.length(); i++) {
880             if (seq[i] != 'N') {  newSeq += seq[i]; }
881         }
882         return newSeq;
883     }
884         catch(exception& e) {
885                 m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
886                 exit(1);
887         }
888 }
889 //**********************************************************************************************************************
890 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
891         try {           
892                 //read fasta file and create sequenceData structure - checking for file mismatches
893                 vector<seqData> sequences;
894                 ifstream in;
895                 m->openInputFile(inputFile, in);
896                 alignLength = 0;
897         
898                 while (!in.eof()) {
899             Sequence temp(in); m->gobble(in);
900                         
901                         int count = ct->getNumSeqs(temp.getName());
902                         if (m->control_pressed) { break; }
903                         else {
904                 temp.setAligned(removeNs(temp.getUnaligned()));
905                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
906                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
907                         }
908                 }
909                 in.close();
910                 
911                 //sort by frequency
912                 sort(sequences.rbegin(), sequences.rend());
913                 
914                 return sequences;
915         }
916         catch(exception& e) {
917                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
918                 exit(1);
919         }
920 }
921 //**********************************************************************************************************************
922 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
923         try {
924                 
925                 vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
926                 for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
927                 
928                 correctModel[0][0] = 0.000000;  //AA
929                 correctModel[1][0] = 11.619259; //CA
930                 correctModel[2][0] = 11.694004; //TA
931                 correctModel[3][0] = 7.748623;  //GA
932                 
933                 correctModel[1][1] = 0.000000;  //CC
934                 correctModel[2][1] = 7.619657;  //TC
935                 correctModel[3][1] = 12.852562; //GC
936                 
937                 correctModel[2][2] = 0.000000;  //TT
938                 correctModel[3][2] = 10.964048; //TG
939                 
940                 correctModel[3][3] = 0.000000;  //GG
941                 
942                 for(int i=0;i<4;i++){
943                         for(int j=0;j<i;j++){
944                                 correctModel[j][i] = correctModel[i][j];
945                         }
946                 }
947                 
948                 int numSeqs = sequences.size();
949                 //int alignLength = sequences[0].sequence.size();
950                 
951                 ofstream chimeraFile;
952                 ofstream accnosFile;
953                 m->openOutputFile(chimeraFileName, chimeraFile); 
954                 m->openOutputFile(accnosFileName, accnosFile); 
955                 
956                 Perseus myPerseus;
957                 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
958                 
959                 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
960                 
961                 vector<bool> chimeras(numSeqs, 0);
962                 
963                 for(int i=0;i<numSeqs;i++){     
964                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
965     
966                         vector<bool> restricted = chimeras;
967                         
968                         vector<vector<int> > leftDiffs(numSeqs);
969                         vector<vector<int> > leftMaps(numSeqs);
970                         vector<vector<int> > rightDiffs(numSeqs);
971                         vector<vector<int> > rightMaps(numSeqs);
972                         
973                         vector<int> singleLeft, bestLeft;
974                         vector<int> singleRight, bestRight;
975                         
976                         int bestSingleIndex, bestSingleDiff;
977                         vector<pwAlign> alignments(numSeqs);
978                         
979                         int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
980                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
981
982                         int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
983                         
984                         string dummyA, dummyB;
985                         
986             if (sequences[i].sequence.size() < 3) { 
987                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
988             }else if(comparisons >= 2){ 
989                                 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
990                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
991
992                                 int minMismatchToTrimera = numeric_limits<int>::max();
993                                 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
994                                 
995                                 if(minMismatchToChimera >= 3 && comparisons >= 3){
996                                         minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
997                                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
998                                 }
999                                 
1000                                 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
1001                                 
1002                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1003
1004                                 string type;
1005                                 string chimeraRefSeq;
1006                                 
1007                                 if(minMismatchToChimera - minMismatchToTrimera >= 3){
1008                                         type = "trimera";
1009                                         chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
1010                                 }
1011                                 else{
1012                                         type = "chimera";
1013                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
1014                                 }
1015
1016                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1017                                 
1018                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
1019                                 
1020                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1021
1022                                 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
1023                                 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
1024                                 
1025                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
1026
1027                                 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
1028                                 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
1029                                 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
1030                                 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
1031                                 
1032                                 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
1033                                 
1034                                 chimeraFile << probability << '\t';
1035                                 
1036                                 if(probability > cutoff){ 
1037                                         chimeraFile << type << endl;
1038                                         accnosFile << sequences[i].seqName << endl;
1039                                         chimeras[i] = 1;
1040                                         numChimeras++;
1041                                 }
1042                                 else{
1043                                         chimeraFile << "good" << endl;
1044                                 }
1045                                 
1046                         }
1047                         else{
1048                                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
1049                         }
1050         
1051                         //report progress
1052                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
1053                 }
1054                 
1055                 if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
1056                 
1057                 chimeraFile.close();
1058                 accnosFile.close();
1059                 
1060                 return numSeqs;
1061         }
1062         catch(exception& e) {
1063                 m->errorOut(e, "ChimeraPerseusCommand", "driver");
1064                 exit(1);
1065         }
1066 }
1067 /**************************************************************************************************/
1068 int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, string newCountFile, vector<string> groups, string group, string fasta, string name) {
1069         try {
1070                 
1071                 vector<int> processIDS;
1072                 int process = 1;
1073                 int num = 0;
1074                 
1075         CountTable newCount;
1076         if (hasCount && dups) { newCount.readTable(name); }
1077         
1078                 //sanity check
1079                 if (groups.size() < processors) { processors = groups.size(); }
1080                 
1081                 //divide the groups between the processors
1082                 vector<linePair> lines;
1083                 int numGroupsPerProcessor = groups.size() / processors;
1084                 for (int i = 0; i < processors; i++) {
1085                         int startIndex =  i * numGroupsPerProcessor;
1086                         int endIndex = (i+1) * numGroupsPerProcessor;
1087                         if(i == (processors - 1)){      endIndex = groups.size();       }
1088                         lines.push_back(linePair(startIndex, endIndex));
1089                 }
1090                 
1091 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1092                 
1093                 //loop through and create all the processes you want
1094                 while (process != processors) {
1095                         int pid = fork();
1096                         
1097                         if (pid > 0) {
1098                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1099                                 process++;
1100                         }else if (pid == 0){
1101                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1102                                 
1103                                 //pass numSeqs to parent
1104                                 ofstream out;
1105                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1106                                 m->openOutputFile(tempFile, out);
1107                                 out << num << endl;
1108                                 out.close();
1109                                 
1110                                 exit(0);
1111                         }else { 
1112                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1113                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1114                                 exit(0);
1115                         }
1116                 }
1117                 
1118                 //do my part
1119                 num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1120                 
1121                 //force parent to wait until all the processes are done
1122                 for (int i=0;i<processIDS.size();i++) { 
1123                         int temp = processIDS[i];
1124                         wait(&temp);
1125                 }
1126                 
1127                 for (int i = 0; i < processIDS.size(); i++) {
1128                         ifstream in;
1129                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1130                         m->openInputFile(tempFile, in);
1131                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1132                         in.close(); m->mothurRemove(tempFile);
1133                 }
1134                 
1135 #else
1136                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1137                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1138                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1139                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1140                 
1141                 vector<perseusData*> pDataArray; 
1142                 DWORD   dwThreadIdArray[processors-1];
1143                 HANDLE  hThreadArray[processors-1]; 
1144                 
1145                 //Create processor worker threads.
1146                 for( int i=1; i<processors; i++ ){
1147                         // Allocate memory for thread data.
1148                         string extension = toString(i) + ".temp";
1149                         
1150                         perseusData* tempPerseus = new perseusData(dups, hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension,  accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1151                         
1152                         pDataArray.push_back(tempPerseus);
1153                         processIDS.push_back(i);
1154                         
1155                         //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
1156                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1157                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1158                 }
1159                 
1160                 
1161                 //using the main process as a worker saves time and memory
1162                 num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1163                 
1164                 //Wait until all threads have terminated.
1165                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1166                         
1167                 //Close all thread handles and free memory allocations.
1168                 for(int i=0; i < pDataArray.size(); i++){
1169                         num += pDataArray[i]->count;
1170                         CloseHandle(hThreadArray[i]);
1171                         delete pDataArray[i];
1172                 }
1173 #endif          
1174                 //read my own
1175         if (hasCount && dups) {
1176             if (!m->isBlank(accnos + ".byCount")) {
1177                 ifstream in2;
1178                 m->openInputFile(accnos + ".byCount", in2);
1179                 
1180                 string name, group;
1181                 while (!in2.eof()) {
1182                     in2 >> name >> group; m->gobble(in2);
1183                     newCount.setAbund(name, group, 0);
1184                 }
1185                 in2.close();
1186             }
1187             m->mothurRemove(accnos + ".byCount");
1188         }
1189
1190                 
1191                 //append output files
1192                 for(int i=0;i<processIDS.size();i++){
1193                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1194                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1195                         
1196                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1197                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1198             
1199             if (hasCount && dups) {
1200                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1201                     ifstream in2;
1202                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1203                     
1204                     string name, group;
1205                     while (!in2.eof()) {
1206                         in2 >> name >> group; m->gobble(in2);
1207                         newCount.setAbund(name, group, 0);
1208                     }
1209                     in2.close();
1210                 }
1211                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1212             }
1213
1214                 }
1215                 
1216         //print new *.pick.count_table
1217         if (hasCount && dups) {  newCount.printTable(newCountFile);   }
1218
1219                 return num;     
1220                 
1221         }
1222         catch(exception& e) {
1223                 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
1224                 exit(1);
1225         }
1226 }
1227 //**********************************************************************************************************************
1228 int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
1229         try {
1230                 map<string, string>::iterator itUnique;
1231                 int total = 0;
1232                 
1233                 //edit accnos file
1234                 ifstream in2; 
1235                 m->openInputFile(accnosFileName, in2);
1236                 
1237                 ofstream out2;
1238                 m->openOutputFile(accnosFileName+".temp", out2);
1239                 
1240                 string name;
1241                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1242                 set<string>::iterator itNames;
1243                 set<string> chimerasInFile;
1244                 set<string>::iterator itChimeras;
1245                 
1246                 
1247                 while (!in2.eof()) {
1248                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1249                         
1250                         in2 >> name; m->gobble(in2);
1251                         
1252                         //find unique name
1253                         itUnique = uniqueNames.find(name);
1254                         
1255                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1256                         else {
1257                                 itChimeras = chimerasInFile.find((itUnique->second));
1258                                 
1259                                 if (itChimeras == chimerasInFile.end()) {
1260                                         out2 << itUnique->second << endl;
1261                                         chimerasInFile.insert((itUnique->second));
1262                                         total++;
1263                                 }
1264                         }
1265                 }
1266                 in2.close();
1267                 out2.close();
1268                 
1269                 m->mothurRemove(accnosFileName);
1270                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1271                 
1272                 //edit chimera file
1273                 ifstream in; 
1274                 m->openInputFile(outputFileName, in);
1275                 
1276                 ofstream out;
1277                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1278                 
1279                 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
1280                 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
1281                 string index, BestMatchName, parent1, parent2, flag;
1282                 name = "";
1283                 namesInFile.clear();    
1284                 //assumptions - in file each read will always look like 
1285                 /*                                                                              
1286                  SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
1287                  0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1288                  1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
1289                  2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
1290                  3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
1291                  */
1292                 
1293                 //get and print headers
1294                 BestMatchName = m->getline(in); m->gobble(in);
1295                 out << BestMatchName << endl;
1296                 
1297                 while (!in.eof()) {
1298                         
1299                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1300                         
1301                         bool print = false;
1302                         in >> index;    m->gobble(in);
1303                         
1304                         if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
1305                                 in >> name;             m->gobble(in);
1306                                 in >> DiffsToBestMatch; m->gobble(in);
1307                                 in >> BestMatchIndex; m->gobble(in);
1308                                 in >> BestMatchName; m->gobble(in);
1309                                 in >> DiffstToChimera; m->gobble(in);
1310                                 in >> IndexofLeftParent; m->gobble(in);
1311                                 in >> IndexOfRightParent; m->gobble(in);
1312                                 in >> parent1;  m->gobble(in);
1313                                 in >> parent2;  m->gobble(in);
1314                                 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
1315                                 
1316                                 //find unique name
1317                                 itUnique = uniqueNames.find(name);
1318                                 
1319                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1320                                 else {
1321                                         name = itUnique->second;
1322                                         //is this name already in the file
1323                                         itNames = namesInFile.find((name));
1324                                         
1325                                         if (itNames == namesInFile.end()) { //no not in file
1326                                                 if (flag == "good") { //are you really a no??
1327                                                         //is this sequence really not chimeric??
1328                                                         itChimeras = chimerasInFile.find(name);
1329                                                         
1330                                                         //then you really are a no so print, otherwise skip
1331                                                         if (itChimeras == chimerasInFile.end()) { print = true; }
1332                                                 }else{ print = true; }
1333                                         }
1334                                 }
1335                                 
1336                                 if (print) {
1337                                         out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1338                                         namesInFile.insert(name);
1339                                         
1340                                         if (BestMatchName != "Null") {
1341                                                 itUnique = uniqueNames.find(BestMatchName);
1342                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1343                                                 else {  out << itUnique->second << '\t';        }                                       
1344                                         }else { out << "Null" << '\t'; }
1345                                         
1346                                         out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1347                                         
1348                                         if (parent1 != "Null") {
1349                                                 itUnique = uniqueNames.find(parent1);
1350                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1351                                                 else {  out << itUnique->second << '\t';        }
1352                                         }else { out << "Null" << '\t'; }
1353                                         
1354                                         if (parent1 != "Null") {
1355                                                 itUnique = uniqueNames.find(parent2);
1356                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1357                                                 else {  out << itUnique->second << '\t';        }
1358                                         }else { out << "Null" << '\t'; }
1359                                         
1360                                         out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
1361                                 }
1362                         }else { index = m->getline(in); m->gobble(in); }
1363                 }
1364                 in.close();
1365                 out.close();
1366                 
1367                 m->mothurRemove(outputFileName);
1368                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1369                 
1370                 return total;
1371         }
1372         catch(exception& e) {
1373                 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1374                 exit(1);
1375         }
1376 }       
1377 //**********************************************************************************************************************
1378
1379