]> git.donarmstrong.com Git - mothur.git/blob - chimeraperseuscommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / chimeraperseuscommand.cpp
1 /*
2  *  chimeraperseuscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/26/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraperseuscommand.h"
11 #include "deconvolutecommand.h"
12 #include "sequence.hpp"
13 //**********************************************************************************************************************
14 vector<string> ChimeraPerseusCommand::setParameters(){  
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22                 CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
23                 CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
24                 CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
25                         
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string ChimeraPerseusCommand::getHelpString(){  
37         try {
38                 string helpString = "";
39                 helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
40                 helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
41                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
42                 helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
43                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
44                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
45                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
46                 helpString += "The alpha parameter ....  The default is -5.54. \n";
47                 helpString += "The beta parameter ....  The default is 0.33. \n";
48                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
49                 helpString += "The chimera.perseus command should be in the following format: \n";
50                 helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
51                 helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){   
62         try {
63         string outputFileName = "";
64                 map<string, vector<string> >::iterator it;
65         
66         //is this a type this command creates
67         it = outputTypes.find(type);
68         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
69         else {
70             if (type == "chimera") {  outputFileName =  "perseus.chimeras"; }
71             else if (type == "accnos") {  outputFileName =  "perseus.accnos"; }
72             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
73         }
74         return outputFileName;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 ChimeraPerseusCommand::ChimeraPerseusCommand(){ 
83         try {
84                 abort = true; calledHelp = true;
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["chimera"] = tempOutNames;
88                 outputTypes["accnos"] = tempOutNames;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
92                 exit(1);
93         }
94 }
95 //***************************************************************************************************************
96 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
97         try {
98                 abort = false; calledHelp = false; 
99                 
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string,string> parameters = parser.getParameters();
109                         
110                         ValidParameters validParameter("chimera.uchime");
111                         map<string,string>::iterator it;
112                         
113                         //check to make sure all parameters are valid for command
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         vector<string> tempOutNames;
119                         outputTypes["chimera"] = tempOutNames;
120                         outputTypes["accnos"] = tempOutNames;
121                         
122                         //if the user changes the input directory command factory will send this info to us in the output parameter 
123                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
124                         if (inputDir == "not found"){   inputDir = "";          }
125                         
126                         //check for required parameters
127                         fastafile = validParameter.validFile(parameters, "fasta", false);
128                         if (fastafile == "not found") {                                 
129                                 //if there is a current fasta file, use it
130                                 string filename = m->getFastaFile(); 
131                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
132                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
133                         }else { 
134                                 m->splitAtDash(fastafile, fastaFileNames);
135                                 
136                                 //go through files and make sure they are good, if not, then disregard them
137                                 for (int i = 0; i < fastaFileNames.size(); i++) {
138                                         
139                                         bool ignore = false;
140                                         if (fastaFileNames[i] == "current") { 
141                                                 fastaFileNames[i] = m->getFastaFile(); 
142                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
143                                                 else {  
144                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
145                                                         //erase from file list
146                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
147                                                         i--;
148                                                 }
149                                         }
150                                         
151                                         if (!ignore) {
152                                                 
153                                                 if (inputDir != "") {
154                                                         string path = m->hasPath(fastaFileNames[i]);
155                                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
157                                                 }
158                                                 
159                                                 int ableToOpen;
160                                                 ifstream in;
161                                                 
162                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
163                                                 
164                                                 //if you can't open it, try default location
165                                                 if (ableToOpen == 1) {
166                                                         if (m->getDefaultPath() != "") { //default path is set
167                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
168                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
169                                                                 ifstream in2;
170                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
171                                                                 in2.close();
172                                                                 fastaFileNames[i] = tryPath;
173                                                         }
174                                                 }
175                                                 
176                                                 if (ableToOpen == 1) {
177                                                         if (m->getOutputDir() != "") { //default path is set
178                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
179                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
180                                                                 ifstream in2;
181                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
182                                                                 in2.close();
183                                                                 fastaFileNames[i] = tryPath;
184                                                         }
185                                                 }
186                                                 
187                                                 in.close();
188                                                 
189                                                 if (ableToOpen == 1) { 
190                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
191                                                         //erase from file list
192                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
193                                                         i--;
194                                                 }else {
195                                                         m->setFastaFile(fastaFileNames[i]);
196                                                 }
197                                         }
198                                 }
199                                 
200                                 //make sure there is at least one valid file left
201                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
202                         }
203                         
204                         
205                         //check for required parameters
206                         bool hasName = true;
207                         namefile = validParameter.validFile(parameters, "name", false);
208                         if (namefile == "not found") { 
209                                 //if there is a current fasta file, use it
210                                 string filename = m->getNameFile(); 
211                                 if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
212                                 else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }                                
213                                 hasName = false;
214                         }else { 
215                                 m->splitAtDash(namefile, nameFileNames);
216                                 
217                                 //go through files and make sure they are good, if not, then disregard them
218                                 for (int i = 0; i < nameFileNames.size(); i++) {
219                                         
220                                         bool ignore = false;
221                                         if (nameFileNames[i] == "current") { 
222                                                 nameFileNames[i] = m->getNameFile(); 
223                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
224                                                 else {  
225                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
226                                                         //erase from file list
227                                                         nameFileNames.erase(nameFileNames.begin()+i);
228                                                         i--;
229                                                 }
230                                         }
231                                         
232                                         if (!ignore) {
233                                                 
234                                                 if (inputDir != "") {
235                                                         string path = m->hasPath(nameFileNames[i]);
236                                                         //if the user has not given a path then, add inputdir. else leave path alone.
237                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
238                                                 }
239                                                 
240                                                 int ableToOpen;
241                                                 ifstream in;
242                                                 
243                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
244                                                 
245                                                 //if you can't open it, try default location
246                                                 if (ableToOpen == 1) {
247                                                         if (m->getDefaultPath() != "") { //default path is set
248                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
249                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
250                                                                 ifstream in2;
251                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
252                                                                 in2.close();
253                                                                 nameFileNames[i] = tryPath;
254                                                         }
255                                                 }
256                                                 
257                                                 if (ableToOpen == 1) {
258                                                         if (m->getOutputDir() != "") { //default path is set
259                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
260                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
261                                                                 ifstream in2;
262                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
263                                                                 in2.close();
264                                                                 nameFileNames[i] = tryPath;
265                                                         }
266                                                 }
267                                                 
268                                                 in.close();
269                                                 
270                                                 if (ableToOpen == 1) { 
271                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
272                                                         //erase from file list
273                                                         nameFileNames.erase(nameFileNames.begin()+i);
274                                                         i--;
275                                                 }else {
276                                                         m->setNameFile(nameFileNames[i]);
277                                                 }
278                                         }
279                                 }
280                                 
281                                 //make sure there is at least one valid file left
282                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
283                         }
284                         
285                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
286                         
287                         bool hasGroup = true;
288                         groupfile = validParameter.validFile(parameters, "group", false);
289                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
290                         else { 
291                                 m->splitAtDash(groupfile, groupFileNames);
292                                 
293                                 //go through files and make sure they are good, if not, then disregard them
294                                 for (int i = 0; i < groupFileNames.size(); i++) {
295                                         
296                                         bool ignore = false;
297                                         if (groupFileNames[i] == "current") { 
298                                                 groupFileNames[i] = m->getGroupFile(); 
299                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
300                                                 else {  
301                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
302                                                         //erase from file list
303                                                         groupFileNames.erase(groupFileNames.begin()+i);
304                                                         i--;
305                                                 }
306                                         }
307                                         
308                                         if (!ignore) {
309                                                 
310                                                 if (inputDir != "") {
311                                                         string path = m->hasPath(groupFileNames[i]);
312                                                         //if the user has not given a path then, add inputdir. else leave path alone.
313                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
314                                                 }
315                                                 
316                                                 int ableToOpen;
317                                                 ifstream in;
318                                                 
319                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
320                                                 
321                                                 //if you can't open it, try default location
322                                                 if (ableToOpen == 1) {
323                                                         if (m->getDefaultPath() != "") { //default path is set
324                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
325                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
326                                                                 ifstream in2;
327                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
328                                                                 in2.close();
329                                                                 groupFileNames[i] = tryPath;
330                                                         }
331                                                 }
332                                                 
333                                                 if (ableToOpen == 1) {
334                                                         if (m->getOutputDir() != "") { //default path is set
335                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
336                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
337                                                                 ifstream in2;
338                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
339                                                                 in2.close();
340                                                                 groupFileNames[i] = tryPath;
341                                                         }
342                                                 }
343                                                 
344                                                 in.close();
345                                                 
346                                                 if (ableToOpen == 1) { 
347                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
348                                                         //erase from file list
349                                                         groupFileNames.erase(groupFileNames.begin()+i);
350                                                         i--;
351                                                 }else {
352                                                         m->setGroupFile(groupFileNames[i]);
353                                                 }
354                                         }
355                                 }
356                                 
357                                 //make sure there is at least one valid file left
358                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
359                         }
360                         
361                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
362                         
363                         
364                         //if the user changes the output directory command factory will send this info to us in the output parameter 
365                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
366                         
367                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
368                         m->setProcessors(temp);
369                         m->mothurConvert(temp, processors);
370                         
371                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
372                         m->mothurConvert(temp, cutoff);
373                         
374                         temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
375                         m->mothurConvert(temp, alpha);
376                         
377                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
378                         m->mothurConvert(temp, beta);
379                 }
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
383                 exit(1);
384         }
385 }
386 //***************************************************************************************************************
387
388 int ChimeraPerseusCommand::execute(){
389         try{
390                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
391                 
392                                 
393                 //process each file
394                 for (int s = 0; s < fastaFileNames.size(); s++) {
395                         
396                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
397                         
398                         int start = time(NULL); 
399                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
400                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
401                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
402
403                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
404                         
405                         //you provided a groupfile
406                         string groupFile = "";
407                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
408                         
409                         string nameFile = "";
410                         if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
411                                 nameFile = nameFileNames[s];
412                         }else { nameFile = getNamesFile(fastaFileNames[s]); }
413                         
414                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
415                         
416                         int numSeqs = 0;
417                         int numChimeras = 0;
418                         
419                         if (groupFile != "") {
420                                 //Parse sequences by group
421                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
422                                 vector<string> groups = parser.getNamesOfGroups();
423                                 
424                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
425                                 
426                                 //clears files
427                                 ofstream out, out1, out2;
428                                 m->openOutputFile(outputFileName, out); out.close(); 
429                                 m->openOutputFile(accnosFileName, out1); out1.close();
430                                 
431                                 if(processors == 1)     {       numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);       }
432                                 else                            {       numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
433                                 
434                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
435                                 
436                                 numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
437                                 
438                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
439                                 
440                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
441                                 
442                         }else{
443                                 if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
444                                 
445                                 //read sequences and store sorted by frequency
446                                 vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
447                                 
448                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
449                                 
450                                 numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
451                         }
452                         
453                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
454                         
455                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
456                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
457                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
458                 }
459                 
460                 //set accnos file as new current accnosfile
461                 string current = "";
462                 itTypes = outputTypes.find("accnos");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
465                 }
466                 
467                 m->mothurOutEndLine();
468                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
469                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
470                 m->mothurOutEndLine();
471                 
472                 return 0;
473                 
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "ChimeraPerseusCommand", "execute");
477                 exit(1);
478         }
479 }
480 //**********************************************************************************************************************
481 string ChimeraPerseusCommand::getNamesFile(string& inputFile){
482         try {
483                 string nameFile = "";
484                 
485                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
486                 
487                 //use unique.seqs to create new name and fastafile
488                 string inputString = "fasta=" + inputFile;
489                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
490                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
491                 m->mothurCalling = true;
492         
493                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
494                 uniqueCommand->execute();
495                 
496                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
497                 
498                 delete uniqueCommand;
499                 m->mothurCalling = false;
500                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
501                 
502                 nameFile = filenames["name"][0];
503                 inputFile = filenames["fasta"][0];
504                 
505                 return nameFile;
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
509                 exit(1);
510         }
511 }
512 //**********************************************************************************************************************
513 int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector<string> groups){
514         try {
515                 
516                 int totalSeqs = 0;
517                 int numChimeras = 0;
518                 
519                 for (int i = start; i < end; i++) {
520                         
521                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
522                         
523                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
524                         
525                         vector<seqData> sequences = loadSequences(parser, groups[i]);
526                         
527                         if (m->control_pressed) { return 0; }
528                         
529                         int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
530                         totalSeqs += numSeqs;
531                         
532                         if (m->control_pressed) { return 0; }
533                         
534                         //append files
535                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
536                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
537                         
538                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
539                 }       
540                 
541                 return totalSeqs;
542                 
543         }
544         catch(exception& e) {
545                 m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
546                 exit(1);
547         }
548 }       
549 //**********************************************************************************************************************
550 vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){
551         try {
552                 
553                 vector<Sequence> thisGroupsSeqs = parser.getSeqs(group);
554                 map<string, string> nameMap = parser.getNameMap(group);
555                 map<string, string>::iterator it;
556                 
557                 vector<seqData> sequences;
558                 bool error = false;
559         alignLength = 0;
560                 
561                 for (int i = 0; i < thisGroupsSeqs.size(); i++) {
562                 
563                         if (m->control_pressed) {  return sequences; }
564                         
565                         it = nameMap.find(thisGroupsSeqs[i].getName());
566                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
567                         else {
568                                 int num = m->getNumNames(it->second);
569                                 sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
570                 if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
571                         }
572                 }
573                 
574                 if (error) { m->control_pressed = true; }
575                 
576                 //sort by frequency
577                 sort(sequences.rbegin(), sequences.rend());
578                 
579                 return sequences;
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
583                 exit(1);
584         }
585 }
586
587 //**********************************************************************************************************************
588 vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
589         try {
590                 map<string, int>::iterator it;
591                 map<string, int> nameMap = m->readNames(name);
592                 
593                 //read fasta file and create sequenceData structure - checking for file mismatches
594                 vector<seqData> sequences;
595                 bool error = false;
596                 ifstream in;
597                 m->openInputFile(inputFile, in);
598                 alignLength = 0;
599         
600                 while (!in.eof()) {
601                         
602                         if (m->control_pressed) { in.close(); return sequences; }
603                         
604                         Sequence temp(in); m->gobble(in);
605                         
606                         it = nameMap.find(temp.getName());
607                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
608                         else {
609                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
610                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
611                         }
612                 }
613                 in.close();
614                 
615                 if (error) { m->control_pressed = true; }
616                 
617                 //sort by frequency
618                 sort(sequences.rbegin(), sequences.rend());
619                 
620                 return sequences;
621         }
622         catch(exception& e) {
623                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
624                 exit(1);
625         }
626 }
627 //**********************************************************************************************************************
628 int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
629         try {
630                 
631                 vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
632                 for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
633                 
634                 correctModel[0][0] = 0.000000;  //AA
635                 correctModel[1][0] = 11.619259; //CA
636                 correctModel[2][0] = 11.694004; //TA
637                 correctModel[3][0] = 7.748623;  //GA
638                 
639                 correctModel[1][1] = 0.000000;  //CC
640                 correctModel[2][1] = 7.619657;  //TC
641                 correctModel[3][1] = 12.852562; //GC
642                 
643                 correctModel[2][2] = 0.000000;  //TT
644                 correctModel[3][2] = 10.964048; //TG
645                 
646                 correctModel[3][3] = 0.000000;  //GG
647                 
648                 for(int i=0;i<4;i++){
649                         for(int j=0;j<i;j++){
650                                 correctModel[j][i] = correctModel[i][j];
651                         }
652                 }
653                 
654                 int numSeqs = sequences.size();
655                 //int alignLength = sequences[0].sequence.size();
656                 
657                 ofstream chimeraFile;
658                 ofstream accnosFile;
659                 m->openOutputFile(chimeraFileName, chimeraFile); 
660                 m->openOutputFile(accnosFileName, accnosFile); 
661                 
662                 Perseus myPerseus;
663                 vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
664                 
665                 chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
666                 
667                 vector<bool> chimeras(numSeqs, 0);
668                 
669                 for(int i=0;i<numSeqs;i++){     
670                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
671     
672                         vector<bool> restricted = chimeras;
673                         
674                         vector<vector<int> > leftDiffs(numSeqs);
675                         vector<vector<int> > leftMaps(numSeqs);
676                         vector<vector<int> > rightDiffs(numSeqs);
677                         vector<vector<int> > rightMaps(numSeqs);
678                         
679                         vector<int> singleLeft, bestLeft;
680                         vector<int> singleRight, bestRight;
681                         
682                         int bestSingleIndex, bestSingleDiff;
683                         vector<pwAlign> alignments(numSeqs);
684                         
685                         int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
686                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
687
688                         int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
689                         
690                         string dummyA, dummyB;
691                         
692             if (sequences[i].sequence.size() < 3) { 
693                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
694             }else if(comparisons >= 2){ 
695                                 minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
696                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
697
698                                 int minMismatchToTrimera = numeric_limits<int>::max();
699                                 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
700                                 
701                                 if(minMismatchToChimera >= 3 && comparisons >= 3){
702                                         minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
703                                         if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
704                                 }
705                                 
706                                 double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
707                                 
708                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
709
710                                 string type;
711                                 string chimeraRefSeq;
712                                 
713                                 if(minMismatchToChimera - minMismatchToTrimera >= 3){
714                                         type = "trimera";
715                                         chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
716                                 }
717                                 else{
718                                         type = "chimera";
719                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
720                                 }
721                                 ;
722                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
723                                 
724                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
725                                 
726                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
727
728                                 double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
729                                 double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
730                                 
731                                 if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
732
733                                 chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
734                                 chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
735                                 chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
736                                 chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
737                                 
738                                 double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
739                                 
740                                 chimeraFile << probability << '\t';
741                                 
742                                 if(probability > cutoff){ 
743                                         chimeraFile << type << endl;
744                                         accnosFile << sequences[i].seqName << endl;
745                                         chimeras[i] = 1;
746                                         numChimeras++;
747                                 }
748                                 else{
749                                         chimeraFile << "good" << endl;
750                                 }
751                                 
752                         }
753                         else{
754                                 chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
755                         }
756         
757                         //report progress
758                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
759                 }
760                 
761                 if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
762                 
763                 chimeraFile.close();
764                 accnosFile.close();
765                 
766                 return numSeqs;
767         }
768         catch(exception& e) {
769                 m->errorOut(e, "ChimeraPerseusCommand", "driver");
770                 exit(1);
771         }
772 }
773 /**************************************************************************************************/
774 int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
775         try {
776                 
777                 vector<int> processIDS;
778                 int process = 1;
779                 int num = 0;
780                 
781                 //sanity check
782                 if (groups.size() < processors) { processors = groups.size(); }
783                 
784                 //divide the groups between the processors
785                 vector<linePair> lines;
786                 int numGroupsPerProcessor = groups.size() / processors;
787                 for (int i = 0; i < processors; i++) {
788                         int startIndex =  i * numGroupsPerProcessor;
789                         int endIndex = (i+1) * numGroupsPerProcessor;
790                         if(i == (processors - 1)){      endIndex = groups.size();       }
791                         lines.push_back(linePair(startIndex, endIndex));
792                 }
793                 
794 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
795                 
796                 //loop through and create all the processes you want
797                 while (process != processors) {
798                         int pid = fork();
799                         
800                         if (pid > 0) {
801                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
802                                 process++;
803                         }else if (pid == 0){
804                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
805                                 
806                                 //pass numSeqs to parent
807                                 ofstream out;
808                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
809                                 m->openOutputFile(tempFile, out);
810                                 out << num << endl;
811                                 out.close();
812                                 
813                                 exit(0);
814                         }else { 
815                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
816                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
817                                 exit(0);
818                         }
819                 }
820                 
821                 //do my part
822                 num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
823                 
824                 //force parent to wait until all the processes are done
825                 for (int i=0;i<processIDS.size();i++) { 
826                         int temp = processIDS[i];
827                         wait(&temp);
828                 }
829                 
830                 for (int i = 0; i < processIDS.size(); i++) {
831                         ifstream in;
832                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
833                         m->openInputFile(tempFile, in);
834                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
835                         in.close(); m->mothurRemove(tempFile);
836                 }
837                 
838 #else
839                 //////////////////////////////////////////////////////////////////////////////////////////////////////
840                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
841                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
842                 //////////////////////////////////////////////////////////////////////////////////////////////////////
843                 
844                 vector<perseusData*> pDataArray; 
845                 DWORD   dwThreadIdArray[processors-1];
846                 HANDLE  hThreadArray[processors-1]; 
847                 
848                 //Create processor worker threads.
849                 for( int i=1; i<processors; i++ ){
850                         // Allocate memory for thread data.
851                         string extension = toString(i) + ".temp";
852                         
853                         perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
854                         
855                         pDataArray.push_back(tempPerseus);
856                         processIDS.push_back(i);
857                         
858                         //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
859                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
860                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
861                 }
862                 
863                 
864                 //using the main process as a worker saves time and memory
865                 num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
866                 
867                 //Wait until all threads have terminated.
868                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
869                         
870                 //Close all thread handles and free memory allocations.
871                 for(int i=0; i < pDataArray.size(); i++){
872                         num += pDataArray[i]->count;
873                         CloseHandle(hThreadArray[i]);
874                         delete pDataArray[i];
875                 }
876 #endif          
877                 
878                 
879                 //append output files
880                 for(int i=0;i<processIDS.size();i++){
881                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
882                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
883                         
884                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
885                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
886                 }
887                 
888                 return num;     
889                 
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
893                 exit(1);
894         }
895 }
896 //**********************************************************************************************************************
897 int ChimeraPerseusCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName){
898         try {
899                 map<string, string> uniqueNames = parser.getAllSeqsMap();
900                 map<string, string>::iterator itUnique;
901                 int total = 0;
902                 
903                 //edit accnos file
904                 ifstream in2; 
905                 m->openInputFile(accnosFileName, in2);
906                 
907                 ofstream out2;
908                 m->openOutputFile(accnosFileName+".temp", out2);
909                 
910                 string name;
911                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
912                 set<string>::iterator itNames;
913                 set<string> chimerasInFile;
914                 set<string>::iterator itChimeras;
915                 
916                 
917                 while (!in2.eof()) {
918                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
919                         
920                         in2 >> name; m->gobble(in2);
921                         
922                         //find unique name
923                         itUnique = uniqueNames.find(name);
924                         
925                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
926                         else {
927                                 itChimeras = chimerasInFile.find((itUnique->second));
928                                 
929                                 if (itChimeras == chimerasInFile.end()) {
930                                         out2 << itUnique->second << endl;
931                                         chimerasInFile.insert((itUnique->second));
932                                         total++;
933                                 }
934                         }
935                 }
936                 in2.close();
937                 out2.close();
938                 
939                 m->mothurRemove(accnosFileName);
940                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
941                 
942                 //edit chimera file
943                 ifstream in; 
944                 m->openInputFile(outputFileName, in);
945                 
946                 ofstream out;
947                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
948                 
949                 int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
950                 float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
951                 string index, BestMatchName, parent1, parent2, flag;
952                 name = "";
953                 namesInFile.clear();    
954                 //assumptions - in file each read will always look like 
955                 /*                                                                              
956                  SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
957                  0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
958                  1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
959                  2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
960                  3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
961                  */
962                 
963                 //get and print headers
964                 BestMatchName = m->getline(in); m->gobble(in);
965                 out << BestMatchName << endl;
966                 
967                 while (!in.eof()) {
968                         
969                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
970                         
971                         bool print = false;
972                         in >> index;    m->gobble(in);
973                         
974                         if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
975                                 in >> name;             m->gobble(in);
976                                 in >> DiffsToBestMatch; m->gobble(in);
977                                 in >> BestMatchIndex; m->gobble(in);
978                                 in >> BestMatchName; m->gobble(in);
979                                 in >> DiffstToChimera; m->gobble(in);
980                                 in >> IndexofLeftParent; m->gobble(in);
981                                 in >> IndexOfRightParent; m->gobble(in);
982                                 in >> parent1;  m->gobble(in);
983                                 in >> parent2;  m->gobble(in);
984                                 in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
985                                 
986                                 //find unique name
987                                 itUnique = uniqueNames.find(name);
988                                 
989                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
990                                 else {
991                                         name = itUnique->second;
992                                         //is this name already in the file
993                                         itNames = namesInFile.find((name));
994                                         
995                                         if (itNames == namesInFile.end()) { //no not in file
996                                                 if (flag == "good") { //are you really a no??
997                                                         //is this sequence really not chimeric??
998                                                         itChimeras = chimerasInFile.find(name);
999                                                         
1000                                                         //then you really are a no so print, otherwise skip
1001                                                         if (itChimeras == chimerasInFile.end()) { print = true; }
1002                                                 }else{ print = true; }
1003                                         }
1004                                 }
1005                                 
1006                                 if (print) {
1007                                         out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
1008                                         namesInFile.insert(name);
1009                                         
1010                                         if (BestMatchName != "Null") {
1011                                                 itUnique = uniqueNames.find(BestMatchName);
1012                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1013                                                 else {  out << itUnique->second << '\t';        }                                       
1014                                         }else { out << "Null" << '\t'; }
1015                                         
1016                                         out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
1017                                         
1018                                         if (parent1 != "Null") {
1019                                                 itUnique = uniqueNames.find(parent1);
1020                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1021                                                 else {  out << itUnique->second << '\t';        }
1022                                         }else { out << "Null" << '\t'; }
1023                                         
1024                                         if (parent1 != "Null") {
1025                                                 itUnique = uniqueNames.find(parent2);
1026                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1027                                                 else {  out << itUnique->second << '\t';        }
1028                                         }else { out << "Null" << '\t'; }
1029                                         
1030                                         out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
1031                                 }
1032                         }else { index = m->getline(in); m->gobble(in); }
1033                 }
1034                 in.close();
1035                 out.close();
1036                 
1037                 m->mothurRemove(outputFileName);
1038                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1039                 
1040                 return total;
1041         }
1042         catch(exception& e) {
1043                 m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
1044                 exit(1);
1045         }
1046 }       
1047 //**********************************************************************************************************************
1048
1049