working on chimera.perseus. made removeConfidences function smarter. Fixed bug in...

author westcott <westcott>

Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)

committer westcott <westcott>

Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)
author westcott <westcott>
Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)
committer westcott <westcott>
Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index 2da1ed5721dac55e0f44c9a856d0dbfded11e094..a756931435626cc568867fc76399c68b64fde994 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -49,6 +49,8 @@
                 A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
                 A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
                 A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
+               A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
+               A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
                 A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
                 A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
                 A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
@@ -432,6 +434,10 @@
                 A7A61F2B130062E000E05B6B /* amovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = amovacommand.h; sourceTree = "<group>"; };
                 A7A61F2C130062E000E05B6B /* amovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = amovacommand.cpp; sourceTree = "<group>"; };
                 A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
+               A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
+               A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
+               A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
+               A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraperseuscommand.cpp; sourceTree = "<group>"; };
                 A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
                 A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                 A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
@@ -1214,6 +1220,8 @@
                                 A7E9B67E12D37EC400DA6239 /* chimeracheckcommand.cpp */,
                                 A7E9B68312D37EC400DA6239 /* chimerapintailcommand.h */,
                                 A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */,
+                               A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */,
+                               A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */,
                                 A7E9B68B12D37EC400DA6239 /* chimeraslayercommand.h */,
                                 A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */,
                                 A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */,
@@ -1688,6 +1696,8 @@
                                 A7E9B68912D37EC400DA6239 /* chimeraslayer.h */,
                                 A7E9B74612D37EC400DA6239 /* maligner.h */,
                                 A7E9B74512D37EC400DA6239 /* maligner.cpp */,
+                               A7BF221314587886000AD524 /* myPerseus.h */,
+                               A7BF221214587886000AD524 /* myPerseus.cpp */,
                                 A7E9B79312D37EC400DA6239 /* pintail.cpp */,
                                 A7E9B79412D37EC400DA6239 /* pintail.h */,
                                 A7E9B82E12D37EC400DA6239 /* slayer.cpp */,
@@ -2155,6 +2165,8 @@
                                 A7FF19F2140FFDA500AD216D /* trimoligos.cpp in Sources */,
                                 A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */,
                                 A7FFB558142CA02C004884F2 /* summarytaxcommand.cpp in Sources */,
+                               A7BF221414587886000AD524 /* myPerseus.cpp in Sources */,
+                               A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp

new file mode 100644 (file)

index 0000000..240ba65
--- /dev/null
+++ b/chimeraperseuscommand.cpp
@@ -0,0 +1,1024 @@
+/*
+ *  chimeraperseuscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/26/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeraperseuscommand.h"
+#include "deconvolutecommand.h"
+#include "sequence.hpp"
+//**********************************************************************************************************************
+vector<string> ChimeraPerseusCommand::setParameters(){ 
+       try {
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
+               CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
+                       
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ChimeraPerseusCommand::getHelpString(){ 
+       try {
+               string helpString = "";
+               helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
+               helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
+               helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
+               helpString += "The name parameter allows you to provide a name file associated with your fasta file. If none is given unique.seqs will be run to generate one. \n";
+               helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
+               helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
+               helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+               helpString += "The alpha parameter ....  The default is -5.54. \n";
+               helpString += "The beta parameter ....  The default is 0.33. \n";
+               helpString += "The cutoff parameter ....  The default is 0.50. \n";
+               helpString += "The chimera.perseus command should be in the following format: \n";
+               helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n";
+               helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n";
+               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ChimeraPerseusCommand::ChimeraPerseusCommand(){        
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["chimera"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
+       try {
+               abort = false; calledHelp = false; 
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter("chimera.uchime");
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       vector<string> tempOutNames;
+                       outputTypes["chimera"] = tempOutNames;
+                       outputTypes["accnos"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", false);
+                       if (fastafile == "not found") {                                 
+                               //if there is a current fasta file, use it
+                               string filename = m->getFastaFile(); 
+                               if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { 
+                               m->splitAtDash(fastafile, fastaFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < fastaFileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (fastaFileNames[i] == "current") { 
+                                               fastaFileNames[i] = m->getFastaFile(); 
+                                               if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       fastaFileNames.erase(fastaFileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(fastaFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
+                                                               m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               fastaFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
+                                                               m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               fastaFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       fastaFileNames.erase(fastaFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setFastaFile(fastaFileNames[i]);
+                                               }
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       
+                       //check for required parameters
+                       bool hasName = true;
+                       namefile = validParameter.validFile(parameters, "name", false);
+                       if (namefile == "not found") { 
+                               //if there is a current fasta file, use it
+                               string filename = m->getNameFile(); 
+                               if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }                                
+                               hasName = false;
+                       }else { 
+                               m->splitAtDash(namefile, nameFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < nameFileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (nameFileNames[i] == "current") { 
+                                               nameFileNames[i] = m->getNameFile(); 
+                                               if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       nameFileNames.erase(nameFileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(nameFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
+                                                               m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               nameFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
+                                                               m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               nameFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       nameFileNames.erase(nameFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setNameFile(nameFileNames[i]);
+                                               }
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+                       
+                       bool hasGroup = true;
+                       groupfile = validParameter.validFile(parameters, "group", false);
+                       if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
+                       else { 
+                               m->splitAtDash(groupfile, groupFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < groupFileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (groupFileNames[i] == "current") { 
+                                               groupFileNames[i] = m->getGroupFile(); 
+                                               if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(groupFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               groupFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       groupFileNames.erase(groupFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setGroupFile(groupFileNames[i]);
+                                               }
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       
+                       if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+                       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+                       
+                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
+                       convert(temp, cutoff);
+                       
+                       temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
+                       convert(temp, alpha);
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
+                       convert(temp, beta);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+int ChimeraPerseusCommand::execute(){
+       try{
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+                               
+               //process each file
+               for (int s = 0; s < fastaFileNames.size(); s++) {
+                       
+                       m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
+                       
+                       int start = time(NULL); 
+                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
+                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
+                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "perseus.accnos";
+                       //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
+                       
+                       //you provided a groupfile
+                       string groupFile = "";
+                       if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
+                       
+                       string nameFile = "";
+                       if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+                               nameFile = nameFileNames[s];
+                       }else { nameFile = getNamesFile(fastaFileNames[s]); }
+                       
+                       if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0;     }                               
+                       
+                       int numSeqs = 0;
+                       int numChimeras = 0;
+                       
+                       if (groupFile != "") {
+                               //Parse sequences by group
+                               SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
+                               vector<string> groups = parser.getNamesOfGroups();
+                               
+                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
+                               
+                               //clears files
+                               ofstream out, out1, out2;
+                               m->openOutputFile(outputFileName, out); out.close(); 
+                               m->openOutputFile(accnosFileName, out1); out1.close();
+                               
+                               if(processors == 1)     {       numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);       }
+                               else                            {       numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
+                               
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                               
+                               numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
+                               
+                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                               
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                               
+                       }else{
+                               if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                               
+                               //read sequences and store sorted by frequency
+                               vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
+                               
+                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                               
+                               numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+                       }
+                       
+                       if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                       
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
+                       outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
+                       outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
+               }
+               
+               //set accnos file as new current accnosfile
+               string current = "";
+               itTypes = outputTypes.find("accnos");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
+               m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ChimeraPerseusCommand::getNamesFile(string& inputFile){
+       try {
+               string nameFile = "";
+               
+               m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               
+               //use unique.seqs to create new name and fastafile
+               string inputString = "fasta=" + inputFile;
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
+               
+               Command* uniqueCommand = new DeconvoluteCommand(inputString);
+               uniqueCommand->execute();
+               
+               map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+               
+               delete uniqueCommand;
+               
+               m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+               
+               nameFile = filenames["name"][0];
+               inputFile = filenames["fasta"][0];
+               
+               return nameFile;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector<string> groups){
+       try {
+               
+               int totalSeqs = 0;
+               int numChimeras = 0;
+               
+               for (int i = start; i < end; i++) {
+                       
+                       m->mothurOutEndLine(); m->mothurOut("Checking sequences from group " + groups[i] + "...");      m->mothurOutEndLine();                                  
+                       
+                       int start = time(NULL);  if (m->control_pressed) {  return 0; }
+                       
+                       vector<seqData> sequences = loadSequences(parser, groups[i]);
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras);
+                       totalSeqs += numSeqs;
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //append files
+                       m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
+                       m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
+                       
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
+               }       
+               
+               return totalSeqs;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "driverGroups");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){
+       try {
+               
+               vector<Sequence> thisGroupsSeqs = parser.getSeqs(group);
+               map<string, string> nameMap = parser.getNameMap(group);
+               map<string, string>::iterator it;
+               
+               vector<seqData> sequences;
+               bool error = false;
+               
+               for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+               
+                       if (m->control_pressed) {  return sequences; }
+                       
+                       it = nameMap.find(thisGroupsSeqs[i].getName());
+                       if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
+                       else {
+                               int num = m->getNumNames(it->second);
+                               sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                       }
+               }
+               
+               if (error) { m->control_pressed = true; }
+               
+               //sort by frequency
+               sort(sequences.rbegin(), sequences.rend());
+               
+               return sequences;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "loadSequences");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
+       try {
+               map<string, int>::iterator it;
+               map<string, int> nameMap = m->readNames(name);
+               
+               //read fasta file and create sequenceData structure - checking for file mismatches
+               vector<seqData> sequences;
+               bool error = false;
+               ifstream in;
+               m->openInputFile(inputFile, in);
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); return sequences; }
+                       
+                       Sequence temp(in); m->gobble(in);
+                       
+                       it = nameMap.find(temp.getName());
+                       if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
+                       else {
+                               sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
+                       }
+               }
+               in.close();
+               
+               if (error) { m->control_pressed = true; }
+               
+               //sort by frequency
+               sort(sequences.rbegin(), sequences.rend());
+               
+               return sequences;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& sequences, string accnosFileName, int& numChimeras){
+       try {
+               
+               vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
+               for(int i=0;i<4;i++){   correctModel[i].resize(4);      }
+               
+               correctModel[0][0] = 0.000000;  //AA
+               correctModel[1][0] = 11.619259; //CA
+               correctModel[2][0] = 11.694004; //TA
+               correctModel[3][0] = 7.748623;  //GA
+               
+               correctModel[1][1] = 0.000000;  //CC
+               correctModel[2][1] = 7.619657;  //TC
+               correctModel[3][1] = 12.852562; //GC
+               
+               correctModel[2][2] = 0.000000;  //TT
+               correctModel[3][2] = 10.964048; //TG
+               
+               correctModel[3][3] = 0.000000;  //GG
+               
+               for(int i=0;i<4;i++){
+                       for(int j=0;j<i;j++){
+                               correctModel[j][i] = correctModel[i][j];
+                       }
+               }
+               
+               int numSeqs = sequences.size();
+               int alignLength = sequences[0].sequence.size();
+               
+               ofstream chimeraFile;
+               ofstream accnosFile;
+               m->openOutputFile(chimeraFileName, chimeraFile); 
+               m->openOutputFile(accnosFileName, accnosFile); 
+               
+               Perseus myPerseus;
+               vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
+               
+               chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
+               
+               vector<bool> chimeras(numSeqs, 0);
+               
+               for(int i=0;i<numSeqs;i++){     
+                       cout << sequences[i].seqName << endl << sequences[i].sequence << endl << sequences[i].frequency << endl;
+                       if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+                       
+                       vector<bool> restricted = chimeras;
+                       
+                       vector<vector<int> > leftDiffs(numSeqs);
+                       vector<vector<int> > leftMaps(numSeqs);
+                       vector<vector<int> > rightDiffs(numSeqs);
+                       vector<vector<int> > rightMaps(numSeqs);
+                       
+                       vector<int> singleLeft, bestLeft;
+                       vector<int> singleRight, bestRight;
+                       
+                       int bestSingleIndex, bestSingleDiff;
+                       vector<pwAlign> alignments(numSeqs);
+                       
+                       int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
+                       cout << "comparisons = " << comparisons << endl;
+                       if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                       int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
+                       
+                       string dummyA, dummyB;
+                       
+                       if(comparisons >= 2){   
+                               minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
+                               cout << "minMismatchToChimera = " << minMismatchToChimera << endl;
+                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                               int minMismatchToTrimera = numeric_limits<int>::max();
+                               int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
+                               
+                               if(minMismatchToChimera >= 3 && comparisons >= 3){
+                                       minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
+                                       cout << "minMismatchToTrimera = " << minMismatchToTrimera << endl;
+                                       if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+                               }
+                               
+                               double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
+                               cout << "singleDist = " << singleDist << endl;
+                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                               string type;
+                               string chimeraRefSeq;
+                               
+                               if(minMismatchToChimera - minMismatchToTrimera >= 3){
+                                       type = "trimera";
+                                       chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
+                               }
+                               else{
+                                       type = "chimera";
+                                       chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
+                               }
+                               cout << "chimeraRefSeq = " << chimeraRefSeq << endl;
+                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+                               
+                               double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
+                               cout << "chimeraDist = " << chimeraDist << endl;
+                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                               double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
+                               double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
+                               cout << "loonIndex = " << loonIndex << endl;
+                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                               chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
+                               chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
+                               chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
+                               chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
+                               
+                               double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, alpha, beta);
+                               
+                               chimeraFile << probability << '\t';
+                               
+                               if(probability > cutoff){ 
+                                       chimeraFile << type << endl;
+                                       accnosFile << sequences[i].seqName << endl;
+                                       chimeras[i] = 1;
+                                       numChimeras++;
+                               }
+                               else{
+                                       chimeraFile << "good" << endl;
+                               }
+                               
+                       }
+                       else{
+                               chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+                       }
+       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
+               }
+               
+               if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
+               
+               chimeraFile.close();
+               accnosFile.close();
+               
+               return numSeqs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
+       try {
+               
+               vector<int> processIDS;
+               int process = 1;
+               int num = 0;
+               
+               //sanity check
+               if (groups.size() < processors) { processors = groups.size(); }
+               
+               //divide the groups between the processors
+               vector<linePair> lines;
+               int numGroupsPerProcessor = groups.size() / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numGroupsPerProcessor;
+                       int endIndex = (i+1) * numGroupsPerProcessor;
+                       if(i == (processors - 1)){      endIndex = groups.size();       }
+                       lines.push_back(linePair(startIndex, endIndex));
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputFName + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); m->mothurRemove(tempFile);
+               }
+               
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<perseusData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = toString(i) + ".temp";
+                       
+                       perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
+                       
+                       pDataArray.push_back(tempPerseus);
+                       processIDS.push_back(i);
+                       
+                       //MyPerseusThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyPerseusThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+               }
+               
+               
+               //using the main process as a worker saves time and memory
+               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               cout << "here" << endl; 
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+#endif         
+               
+               
+               //append output files
+               for(int i=0;i<processIDS.size();i++){
+                       m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
+                       m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+                       m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+               }
+               
+               return num;     
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ChimeraPerseusCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName){
+       try {
+               map<string, string> uniqueNames = parser.getAllSeqsMap();
+               map<string, string>::iterator itUnique;
+               int total = 0;
+               
+               //edit accnos file
+               ifstream in2; 
+               m->openInputFile(accnosFileName, in2);
+               
+               ofstream out2;
+               m->openOutputFile(accnosFileName+".temp", out2);
+               
+               string name;
+               set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
+               set<string>::iterator itNames;
+               set<string> chimerasInFile;
+               set<string>::iterator itChimeras;
+               
+               
+               while (!in2.eof()) {
+                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+                       
+                       in2 >> name; m->gobble(in2);
+                       
+                       //find unique name
+                       itUnique = uniqueNames.find(name);
+                       
+                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       else {
+                               itChimeras = chimerasInFile.find((itUnique->second));
+                               
+                               if (itChimeras == chimerasInFile.end()) {
+                                       out2 << itUnique->second << endl;
+                                       chimerasInFile.insert((itUnique->second));
+                                       total++;
+                               }
+                       }
+               }
+               in2.close();
+               out2.close();
+               
+               m->mothurRemove(accnosFileName);
+               rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+               
+               //edit chimera file
+               ifstream in; 
+               m->openInputFile(outputFileName, in);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent;
+               float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+               string index, BestMatchName, parent1, parent2, flag;
+               name = "";
+               namesInFile.clear();    
+               //assumptions - in file each read will always look like 
+               /*                                                                              
+                SequenceIndex  Name    DiffsToBestMatch        BestMatchIndex  BestMatchName   DiffstToChimera IndexofLeftParent       IndexOfRightParent      NameOfLeftParent        NameOfRightParent       DistanceToBestMatch     cIndex  (cIndex - singleDist)   loonIndex       MismatchesToChimera     MismatchToTrimera       ChimeraBreakPoint       LogisticProbability     TypeOfSequence
+                0      F01QG4L02JVBQY  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
+                1      F01QG4L02ICTC6  0       0       Null    0       0       0       Null    Null    0.0     0.0     0.0     0.0     0       0       0       0.0     0.0     good
+                2      F01QG4L02JZOEC  48      0       F01QG4L02JVBQY  47      0       0       F01QG4L02JVBQY  F01QG4L02JVBQY  2.0449  2.03545 -0.00944493     0       47      2147483647      138     0       good
+                3      F01QG4L02G7JEC  42      0       F01QG4L02JVBQY  40      1       0       F01QG4L02ICTC6  F01QG4L02JVBQY  1.87477 1.81113 -0.0636404      5.80145 40      2147483647      25      0       good
+                */
+               
+               //get and print headers
+               BestMatchName = m->getline(in); m->gobble(in);
+               out << BestMatchName << endl;
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
+                       
+                       bool print = false;
+                       in >> index;    m->gobble(in);
+                       
+                       if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given
+                               in >> name;             m->gobble(in);
+                               in >> DiffsToBestMatch; m->gobble(in);
+                               in >> BestMatchIndex; m->gobble(in);
+                               in >> BestMatchName; m->gobble(in);
+                               in >> DiffstToChimera; m->gobble(in);
+                               in >> IndexofLeftParent; m->gobble(in);
+                               in >> IndexOfRightParent; m->gobble(in);
+                               in >> parent1;  m->gobble(in);
+                               in >> parent2;  m->gobble(in);
+                               in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; m->gobble(in);
+                               
+                               //find unique name
+                               itUnique = uniqueNames.find(name);
+                               
+                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                               else {
+                                       name = itUnique->second;
+                                       //is this name already in the file
+                                       itNames = namesInFile.find((name));
+                                       
+                                       if (itNames == namesInFile.end()) { //no not in file
+                                               if (flag == "good") { //are you really a no??
+                                                       //is this sequence really not chimeric??
+                                                       itChimeras = chimerasInFile.find(name);
+                                                       
+                                                       //then you really are a no so print, otherwise skip
+                                                       if (itChimeras == chimerasInFile.end()) { print = true; }
+                                               }else{ print = true; }
+                                       }
+                               }
+                               
+                               if (print) {
+                                       out << index << '\t' << name  << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t';
+                                       namesInFile.insert(name);
+                                       
+                                       if (BestMatchName != "Null") {
+                                               itUnique = uniqueNames.find(BestMatchName);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find BestMatchName "+ BestMatchName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {  out << itUnique->second << '\t';        }                                       
+                                       }else { out << "Null" << '\t'; }
+                                       
+                                       out << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t';
+                                       
+                                       if (parent1 != "Null") {
+                                               itUnique = uniqueNames.find(parent1);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent1 "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {  out << itUnique->second << '\t';        }
+                                       }else { out << "Null" << '\t'; }
+                                       
+                                       if (parent1 != "Null") {
+                                               itUnique = uniqueNames.find(parent2);
+                                               if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parent2 "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                               else {  out << itUnique->second << '\t';        }
+                                       }else { out << "Null" << '\t'; }
+                                       
+                                       out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl;    
+                               }
+                       }else { index = m->getline(in); m->gobble(in); }
+               }
+               in.close();
+               out.close();
+               
+               m->mothurRemove(outputFileName);
+               rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+               
+               return total;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+
+
diff --git a/chimeraperseuscommand.h b/chimeraperseuscommand.h

new file mode 100644 (file)

index 0000000..84563ac
--- /dev/null
+++ b/chimeraperseuscommand.h
@@ -0,0 +1,325 @@
+#ifndef CHIMERAPERSEUSCOMMAND_H
+#define CHIMERAPERSEUSCOMMAND_H
+
+
+/*
+ *  chimeraperseuscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/26/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+
+#include "mothur.h"
+#include "command.hpp"
+#include "sequenceparser.h"
+#include "myPerseus.h"
+
+/***********************************************************/
+class ChimeraPerseusCommand : public Command {
+public:
+       ChimeraPerseusCommand(string);
+       ChimeraPerseusCommand();
+       ~ChimeraPerseusCommand() {}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "chimera.perseus";             }
+       string getCommandCategory()             { return "Sequence Processing"; }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Chimera.perseus\n"; }
+       string getDescription()         { return "detect chimeric sequences"; }
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }          
+       
+private:
+       struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+       
+       bool abort;
+       string fastafile, groupfile, outputDir, namefile;
+       int processors;
+       double cutoff, alpha, beta;
+       
+       vector<string> outputNames;
+       vector<string> fastaFileNames;
+       vector<string> nameFileNames;
+       vector<string> groupFileNames;
+       
+       string getNamesFile(string&);
+       int driver(string, vector<seqData>&, string, int&);
+       vector<seqData> readFiles(string, string);
+       vector<seqData> loadSequences(SequenceParser&, string);
+       int deconvoluteResults(SequenceParser&, string, string);
+       int driverGroups(SequenceParser&, string, string, int, int, vector<string>);
+       int createProcessesGroups(SequenceParser&, string, string, vector<string>, string, string, string);
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct perseusData {
+       string fastafile; 
+       string namefile; 
+       string groupfile;
+       string outputFName;
+       string accnos;
+       MothurOut* m;
+       int start;
+       int end;
+       int threadID, count, numChimeras;
+       double alpha, beta, cutoff;
+       vector<string> groups;
+       
+       perseusData(){}
+       perseusData(double a, double b, double c, string o,  string f, string n, string g, string ac, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+               alpha = a;
+               beta = b;
+               cutoff = c;
+               fastafile = f;
+               namefile = n;
+               groupfile = g;
+               outputFName = o;
+               accnos = ac;
+               m = mout;
+               start = st;
+               end = en;
+               threadID = tid;
+               groups = gr;
+               count = 0;
+               numChimeras = 0;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ 
+       perseusData* pDataArray;
+       pDataArray = (perseusData*)lpParam;
+       
+       try {
+               
+               //clears files
+               ofstream out, out1, out2;
+               pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
+               pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
+               
+               //parse fasta and name file by group
+               SequenceParser* parser;
+               if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);      }
+               else                                                    { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
+               
+               int totalSeqs = 0;
+               int numChimeras = 0;
+               
+               for (int i = pDataArray->start; i < pDataArray->end; i++) {
+                       
+                       int start = time(NULL);  if (pDataArray->m->control_pressed) {  delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
+                       
+                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group " + pDataArray->groups[i] + "...");  pDataArray->m->mothurOutEndLine();                                      
+                       
+                       //vector<seqData> sequences = loadSequences(parser, groups[i]); - same function below
+                       ////////////////////////////////////////////////////////////////////////////////////////
+                       vector<Sequence> thisGroupsSeqs = parser->getSeqs(pDataArray->groups[i]);
+                       map<string, string> nameMap = parser->getNameMap(pDataArray->groups[i]);
+                       map<string, string>::iterator it;
+                       
+                       vector<seqData> sequences;
+                       bool error = false;
+                       
+                       for (int j = 0; j < thisGroupsSeqs.size(); j++) {
+                               
+                               if (pDataArray->m->control_pressed) {  delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
+                               
+                               it = nameMap.find(thisGroupsSeqs[j].getName());
+                               if (it == nameMap.end()) { error = true; pDataArray->m->mothurOut("[ERROR]: " + thisGroupsSeqs[j].getName() + " is in your fasta file and not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                               else {
+                                       int num = pDataArray->m->getNumNames(it->second);
+                                       sequences.push_back(seqData(thisGroupsSeqs[j].getName(), thisGroupsSeqs[j].getUnaligned(), num));
+                               }
+                       }
+                       
+                       if (error) { pDataArray->m->control_pressed = true; }
+                       
+                       //sort by frequency
+                       sort(sequences.rbegin(), sequences.rend());
+                       ////////////////////////////////////////////////////////////////////////////////////////
+
+                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
+                       
+                       //int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras); - same function below
+                       ////////////////////////////////////////////////////////////////////////////////////////
+                       string chimeraFileName = pDataArray->outputFName+pDataArray->groups[i];
+                       string accnosFileName = pDataArray->accnos+pDataArray->groups[i];
+                       
+                       vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
+                       for(int j=0;j<4;j++){   correctModel[j].resize(4);      }
+                       
+                       correctModel[0][0] = 0.000000;  //AA
+                       correctModel[1][0] = 11.619259; //CA
+                       correctModel[2][0] = 11.694004; //TA
+                       correctModel[3][0] = 7.748623;  //GA
+                       
+                       correctModel[1][1] = 0.000000;  //CC
+                       correctModel[2][1] = 7.619657;  //TC
+                       correctModel[3][1] = 12.852562; //GC
+                       
+                       correctModel[2][2] = 0.000000;  //TT
+                       correctModel[3][2] = 10.964048; //TG
+                       
+                       correctModel[3][3] = 0.000000;  //GG
+                       
+                       for(int k=0;k<4;k++){
+                               for(int j=0;j<k;j++){
+                                       correctModel[j][k] = correctModel[k][j];
+                               }
+                       }
+                       
+                       int numSeqs = sequences.size();
+                       int alignLength = sequences[0].sequence.size();
+                       
+                       ofstream chimeraFile;
+                       ofstream accnosFile;
+                       pDataArray->m->openOutputFile(chimeraFileName, chimeraFile); 
+                       pDataArray->m->openOutputFile(accnosFileName, accnosFile); 
+                       
+                       Perseus myPerseus;
+                       vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
+                       
+                       chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
+                       
+                       vector<bool> chimeras(numSeqs, 0);
+                       
+                       for(int j=0;j<numSeqs;j++){     
+                               
+                               if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                               
+                               vector<bool> restricted = chimeras;
+                               
+                               vector<vector<int> > leftDiffs(numSeqs);
+                               vector<vector<int> > leftMaps(numSeqs);
+                               vector<vector<int> > rightDiffs(numSeqs);
+                               vector<vector<int> > rightMaps(numSeqs);
+                               
+                               vector<int> singleLeft, bestLeft;
+                               vector<int> singleRight, bestRight;
+                               
+                               int bestSingleIndex, bestSingleDiff;
+                               vector<pwAlign> alignments(numSeqs);
+                               
+                               int comparisons = myPerseus.getAlignments(j, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
+                               
+                               if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                               
+                               int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
+                               
+                               string dummyA, dummyB;
+                               
+                               if(comparisons >= 2){   
+                                       minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
+                                       
+                                       if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       
+                                       int minMismatchToTrimera = numeric_limits<int>::max();
+                                       int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
+                                       
+                                       if(minMismatchToChimera >= 3 && comparisons >= 3){
+                                               minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
+                                               
+                                               if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       }
+                                       
+                                       double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[j].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
+                                       
+                                       if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       
+                                       string type;
+                                       string chimeraRefSeq;
+                                       
+                                       if(minMismatchToChimera - minMismatchToTrimera >= 3){
+                                               type = "trimera";
+                                               chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
+                                       }
+                                       else{
+                                               type = "chimera";
+                                               chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
+                                       }
+                                       
+                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       
+                                       double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[j].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
+                                       
+                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       
+                                       double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[j].sequence, chimeraRefSeq);
+                                       double loonIndex = myPerseus.calcLoonIndex(sequences[j].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
+                                       
+                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
+                                       
+                                       chimeraFile << j << '\t' << sequences[j].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
+                                       chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
+                                       chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
+                                       chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
+                                       
+                                       double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, pDataArray->alpha, pDataArray->beta);
+                                       
+                                       chimeraFile << probability << '\t';
+                                       
+                                       if(probability > pDataArray->cutoff){ 
+                                               chimeraFile << type << endl;
+                                               accnosFile << sequences[j].seqName << endl;
+                                               chimeras[j] = 1;
+                                               numChimeras++;
+                                       }
+                                       else{
+                                               chimeraFile << "good" << endl;
+                                       }
+                                       
+                               }
+                               else{
+                                       chimeraFile << j << '\t' << sequences[j].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+                               }
+                               //report progress
+                               if((j+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(j+1) + "\n");               }
+                       }
+                       
+                       if((numSeqs) % 100 != 0){       pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");           }
+                       
+                       chimeraFile.close();
+                       accnosFile.close();
+                       ////////////////////////////////////////////////////////////////////////////////////////
+
+                       totalSeqs += numSeqs;
+                       
+                       //append files
+                       pDataArray->m->appendFiles(chimeraFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(chimeraFileName);
+                       pDataArray->m->appendFiles(accnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(accnosFileName);
+                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->groups[i] + ".");        pDataArray->m->mothurOutEndLine();                                      
+                       
+                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
+               }       
+               
+               pDataArray->count = totalSeqs;
+               delete parser;
+               return totalSeqs;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyPerseusThreadFunction");
+               exit(1);
+       }
+} 
+/**************************************************************************************************/
+
+#endif
+
+#endif
+
+
diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp

index e08909bc89993db47ed80836a417b7020f68d068..ee7add9ebd4ff55ea37312d3194bbd4df91270e2 100644 (file)
--- a/chimerauchimecommand.cpp
+++ b/chimerauchimecommand.cpp
@@ -1514,7 +1514,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                                 
  #else
                 //////////////////////////////////////////////////////////////////////////////////////////////////////
-               //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
+               //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                 //////////////////////////////////////////////////////////////////////////////////////////////////////
                 
@@ -1534,7 +1534,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                         pDataArray.push_back(tempUchime);
                         processIDS.push_back(i);
                         
-                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                 }
diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp

index 7b1a04f3658bcb0a3e919fefe34a30dedcfaa882..f36631a0590725867dffc402f74fd73af6168e20 100644 (file)
--- a/classifyotucommand.cpp
+++ b/classifyotucommand.cpp
@@ -368,7 +368,7 @@ int ClassifyOtuCommand::readTaxonomyFile() {
                         m->gobble(in);
                         
                         //are there confidence scores, if so remove them
-                       if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
+                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
                         
                         taxMap[name] = tax;
                         
@@ -550,7 +550,7 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                         out << (i+1) << '\t' << size << '\t' << conTax << endl;
                         
                         string noConfidenceConTax = conTax;
-                       removeConfidences(noConfidenceConTax);
+                       m->removeConfidences(noConfidenceConTax);
                         
                         //add this bins taxonomy to summary
                         if (basis == "sequence") {
@@ -604,36 +604,6 @@ string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
                 exit(1);
         }
  }
-
-/**************************************************************************************************/
-void ClassifyOtuCommand::removeConfidences(string& tax) {
-       try {
-               
-               string taxon;
-               string newTax = "";
-               
-               while (tax.find_first_of(';') != -1) {
-                       //get taxon
-                       taxon = tax.substr(0,tax.find_first_of(';'));
-                       
-                       int pos = taxon.find_first_of('(');
-                       if (pos != -1) {
-                               taxon = taxon.substr(0, pos); //rip off confidence 
-                       }
-                       
-                       taxon += ";";
-                       
-                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
-                       newTax += taxon;
-               }
-               
-               tax = newTax;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "removeConfidences");
-               exit(1);
-       }
-}
  //**********************************************************************************************************************
  
  
diff --git a/classifyotucommand.h b/classifyotucommand.h

index d1c173490ac3778e485bcb58334047f4757d6a1e..a0baf1849d92935fec3ca360a135805d3c41ddbc 100644 (file)
--- a/classifyotucommand.h
+++ b/classifyotucommand.h
@@ -46,7 +46,6 @@ private:
  
         int readNamesFile();
         int readTaxonomyFile();
-       void removeConfidences(string&);
         int process(ListVector*);
         string addUnclassifieds(string, int);
         vector<string> findConsensusTaxonomy(int, ListVector*, int&, string&);  // returns the name of the "representative" taxonomy of given bin
diff --git a/commandfactory.cpp b/commandfactory.cpp

index 2fc6875c6430bd769e3e8b56e484b5da1561a30c..69bb568e761f590e2419ee61079783679c914efa 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -123,6 +123,7 @@
  #include "countgroupscommand.h"
  #include "clearmemorycommand.h"
  #include "summarytaxcommand.h"
+#include "chimeraperseuscommand.h"
  
  /*******************************************************/
  
@@ -258,6 +259,7 @@ CommandFactory::CommandFactory(){
         commands["chimera.check"]               = "MPIEnabled";
         commands["chimera.slayer"]              = "MPIEnabled";
         commands["chimera.uchime"]              = "chimera.uchime";
+       commands["chimera.perseus"]             = "chimera.perseus";
         commands["chimera.pintail"]             = "MPIEnabled";
         commands["chimera.bellerophon"] = "MPIEnabled";
         commands["screen.seqs"]                 = "MPIEnabled";
@@ -267,6 +269,7 @@ CommandFactory::CommandFactory(){
         commands["sens.spec"]                   = "sens.spec";
         commands["seq.error"]                   = "seq.error";
         commands["seq.error"]                   = "summary.tax";
+       
         commands["quit"]                                = "MPIEnabled"; 
  
  }
@@ -424,6 +427,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                 else if(commandName == "count.groups")                  {       command = new CountGroupsCommand(optionString);                         }
                 else if(commandName == "clear.memory")                  {       command = new ClearMemoryCommand(optionString);                         }
                 else if(commandName == "summary.tax")                   {       command = new SummaryTaxCommand(optionString);                          }
+               else if(commandName == "chimera.perseus")               {       command = new ChimeraPerseusCommand(optionString);                      }
                 else                                                                                    {       command = new NoCommand(optionString);                                          }
  
                 return command;
@@ -565,6 +569,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                 else if(commandName == "count.groups")                  {       pipecommand = new CountGroupsCommand(optionString);                             }
                 else if(commandName == "clear.memory")                  {       pipecommand = new ClearMemoryCommand(optionString);                             }
                 else if(commandName == "summary.tax")                   {       pipecommand = new SummaryTaxCommand(optionString);                              }
+               else if(commandName == "chimera.perseus")               {       pipecommand = new ChimeraPerseusCommand(optionString);                  }
                 else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
  
                 return pipecommand;
@@ -694,6 +699,7 @@ Command* CommandFactory::getCommand(string commandName){
                 else if(commandName == "count.groups")                  {       shellcommand = new CountGroupsCommand();                        }
                 else if(commandName == "clear.memory")                  {       shellcommand = new ClearMemoryCommand();                        }
                 else if(commandName == "summary.tax")                   {       shellcommand = new SummaryTaxCommand();                         }
+               else if(commandName == "chimera.perseus")               {       shellcommand = new ChimeraPerseusCommand();                     }
                 else                                                                                    {       shellcommand = new NoCommand();                                         }
  
                 return shellcommand;
diff --git a/getlineagecommand.cpp b/getlineagecommand.cpp

index 195c8229d4126846801c90d052c783bc2a49f9e6..b4e41e5fb60f48823528907df5804ff05c01c419 100644 (file)
--- a/getlineagecommand.cpp
+++ b/getlineagecommand.cpp
@@ -564,7 +564,8 @@ int GetLineageCommand::readTax(){
                         if (hasConPos != string::npos) {  
                                 taxonsHasConfidence[i] = true; 
                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
-                               noConfidenceTaxons[i] = removeConfidences(listOfTaxons[i]);
+                               noConfidenceTaxons[i] = listOfTaxons[i];
+                               m->removeConfidences(noConfidenceTaxons[i]);
                         }
                 }
                 
@@ -584,7 +585,8 @@ int GetLineageCommand::readTax(){
                                 if (!taxonsHasConfidence[j]) {
                                         int hasConfidences = tax.find_first_of('(');
                                         if (hasConfidences != string::npos) { 
-                                               newtax = removeConfidences(tax);
+                                               newtax = tax;
+                                               m->removeConfidences(newtax);
                                         }
                                 
                                         int pos = newtax.find(noConfidenceTaxons[j]);
@@ -613,7 +615,8 @@ int GetLineageCommand::readTax(){
                                                 string noNewTax = tax;
                                                 int hasConfidences = tax.find_first_of('(');
                                                 if (hasConfidences != string::npos) { 
-                                                       noNewTax = removeConfidences(tax);
+                                                       noNewTax = tax;
+                                                       m->removeConfidences(noNewTax);
                                                 }
                                         
                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
@@ -725,29 +728,6 @@ vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
                 exit(1);
         }
  }
-/**************************************************************************************************/
-string GetLineageCommand::removeConfidences(string tax) {
-       try {
-               
-               string taxon = "";
-               int taxLength = tax.length();
-               for(int i=0;i<taxLength;i++){
-                       if(tax[i] == ';'){
-                               taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
-                               taxon += ";";
-                       }
-                       else{
-                               taxon += tax[i];
-                       }
-               }
-                               
-               return taxon;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "GetLineageCommand", "removeConfidences");
-               exit(1);
-       }
-}
  //**********************************************************************************************************************
  //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
  int GetLineageCommand::readAlign(){
diff --git a/getlineagecommand.h b/getlineagecommand.h

index 4c9514d2226028f54d6996070d3776d66aab6122..a6770614b6dfffac32f4e57ce65f451a0046085e 100644 (file)
--- a/getlineagecommand.h
+++ b/getlineagecommand.h
@@ -44,7 +44,6 @@ class GetLineageCommand : public Command {
                 int readAlign();
                 int readList();
                 int readTax();  
-               string removeConfidences(string);
                 vector< map<string, float> > getTaxons(string);
  };
  
diff --git a/metastatscommand.cpp b/metastatscommand.cpp

index b493c962461b51df14a06ef51b579235d89ef181..975924bae580f11271c84ebe79a9897dfe71f58d 100644 (file)
--- a/metastatscommand.cpp
+++ b/metastatscommand.cpp
@@ -394,7 +394,7 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                         //get set names
                         string setA = namesOfGroupCombos[c][0]; 
                         string setB = namesOfGroupCombos[c][1];
-               
+               cout << setA << '\t' << setB << endl;
                         //get filename
                         string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
                         outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
@@ -425,7 +425,8 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                 }
                         }
                         
-                       //for (int i = 0; i < subset.size(); i++) { cout << subset[i]->getGroup() << endl; }
+                       for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; }
+                       cout << setACount << endl;
                         
                         if ((setACount == 0) || (setBCount == 0))  { 
                                 m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp

index 6a834e96be802fe416949dc8910d8790a24ddf15..9ba40502a2294d5ce42ba591c076da6fd3015979 100644 (file)
--- a/mothurmetastats.cpp
+++ b/mothurmetastats.cpp
@@ -55,10 +55,10 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                 }
                 
                 //total for first grouping
-               for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; }
+               for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
                 
                 //total for second grouping
-               for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; }
+               for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
                 
                 //Creates the ratios by first finding the minimum of totals
                 double min = total[0];
@@ -105,15 +105,15 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                 if (m->control_pressed) { return 1; }
                 
                 // Start the calculations.
-               if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ 
+               if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){ 
                         
                         vector<double> fish;    fish.resize(row, 0.0);
                         vector<double> fish2;   fish2.resize(row, 0.0);
                         
                         for(int i = 0; i < row; i++){
                                 
-                               for(int j = 0; j < (secondGroupingStart-1); j++)                { fish[i] += data[i][j];        }
-                               for(int j = (secondGroupingStart-1); j < column; j++)   { fish2[i] += data[i][j];       }
+                               for(int j = 0; j < secondGroupingStart; j++)            { fish[i] += data[i][j];        }
+                               for(int j = secondGroupingStart; j < column; j++)       { fish2[i] += data[i][j];       }
                                 
                                 //vector<double> tempData; tempData.resize(4, 0.0);
                                 double f11, f12, f21, f22;
@@ -148,12 +148,12 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                         
                         for(int i = 0; i < row; i++){
                                 
-                               for(int j = 0; j < (secondGroupingStart-1); j++){       sparse[i] += data[i][j]; }
-                               if(sparse[i] < (double)(secondGroupingStart-1)){        c++; }
+                               for(int j = 0; j < secondGroupingStart; j++)    {       sparse[i] += data[i][j];        }
+                               if(sparse[i] < (double)secondGroupingStart)             {       c++;                                            }
                                 
                                 // ?<= for col
-                               for(int j = (secondGroupingStart-1); j < column; j++){  sparse2[i] += data[i][j]; }
-                               if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; }
+                               for(int j = secondGroupingStart; j < column; j++)               {  sparse2[i] += data[i][j]; }
+                               if( (sparse2[i] < (double)(column-secondGroupingStart)))        { c++;                                           }
                                 
                                 if (c == 2) {
                                         c=0;
@@ -190,11 +190,11 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                 for (int j = 0; j < row; j++){
                         if (m->control_pressed) { return 1; }
                         
-                       for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; }
-                       temp[j][0] /= (double)(secondGroupingStart-1);
+                       for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
+                       temp[j][0] /= (double)secondGroupingStart;
                         
-                       for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; }
-                       temp[j][1] /= (double)(column-secondGroupingStart+1);
+                       for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
+                       temp[j][1] /= (double)(column-secondGroupingStart);
                 }
                 
                 for(int i = 0; i < row; i++){
@@ -280,14 +280,14 @@ int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vec
                         storage[i][0]=C1[i][0];
                         C1[i][1]=tool[i+row+row]; // var group 1
                         storage[i][1]=C1[i][1];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart-1);
+                       C1[i][2]=C1[i][1]/(secondGroupingStart);
                         storage[i][2]=sqrt(C1[i][2]);
                         
                         C2[i][0]=tool[i+row]; // mean group 2
                         storage[i][4]=C2[i][0];    
                         C2[i][1]=tool[i+row+row+row]; // var group 2 
                         storage[i][5]=C2[i][1];        
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
+                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
                         storage[i][6]=sqrt(C2[i][2]);   
                 }
                 
@@ -314,7 +314,7 @@ int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, v
                 vector<double> var;             var.resize(row, 0.0);
                 vector<double> var2;    var2.resize(row, 0.0);
                 
-               double a = secondGroupingStart-1;
+               double a = secondGroupingStart;
                 double b = column - a;
                 int m = a * row;
                 int n = row * column;
@@ -459,11 +459,11 @@ int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupi
                         if (m->control_pressed) { return 0; }
                         C1[i][0]=tool[i];
                         C1[i][1]=tool[i+row+row];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart-1);
+                       C1[i][2]=C1[i][1]/(secondGroupingStart);
                         
                         C2[i][0]=tool[i+row];
                         C2[i][1]=tool[i+row+row+row]; // var group 2 
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
+                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
                 }
                 
                 for (int i = 0; i < row; i++){
diff --git a/mothurout.cpp b/mothurout.cpp

index 90d6f37434d7cb4df231817be6d1759df80b45bb..32f3b85819bb05206cb789d4e41e70d02fcd2e65 100644 (file)
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -2020,6 +2020,46 @@ bool MothurOut::isContainingOnlyDigits(string input) {
         }
  }
  /**************************************************************************************************/
+int MothurOut::removeConfidences(string& tax) {
+       try {
+               
+               string taxon;
+               string newTax = "";
+               
+               while (tax.find_first_of(';') != -1) {
+                       
+                       if (control_pressed) { return 0; }
+                       
+                       //get taxon
+                       taxon = tax.substr(0,tax.find_first_of(';'));
+       
+                       int pos = taxon.find_last_of('(');
+                       if (pos != -1) {
+                               //is it a number?
+                               int pos2 = taxon.find_last_of(')');
+                               if (pos2 != -1) {
+                                       string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
+                                       if (isContainingOnlyDigits(confidenceScore)) {
+                                               taxon = taxon.substr(0, pos); //rip off confidence 
+                                       }
+                               }
+                       }
+                       taxon += ";";
+                       
+                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+                       newTax += taxon;
+               }
+               
+               tax = newTax;
+               
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "removeConfidences");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
  
  
  
diff --git a/mothurout.h b/mothurout.h

index 0c45a30387b3164f8aceed5ffbd92b9982aa49a6..785263b4a11474ba939df4c97ae5f94ce6206d5a 100644 (file)
--- a/mothurout.h
+++ b/mothurout.h
@@ -84,6 +84,7 @@ class MothurOut {
                 int readNames(string, map<string, vector<string> >&);
                 int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
                 void mothurRemove(string);
+       
                 
                 //searchs and checks
                 bool checkReleaseVersion(ifstream&, string);
@@ -105,6 +106,7 @@ class MothurOut {
                 void splitAtDash(string&, set<string>&);
                 void splitAtDash(string&, vector<string>&);
                 void splitAtChar(string&, vector<string>&, char);
+               int removeConfidences(string&);
                 
                 //math operation
                 int factorial(int num);
diff --git a/myPerseus.cpp b/myPerseus.cpp

new file mode 100644 (file)

index 0000000..b342393
--- /dev/null
+++ b/myPerseus.cpp
@@ -0,0 +1,1073 @@
+/*
+ *  myPerseus.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 9/5/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "myPerseus.h"
+
+/**************************************************************************************************/
+int PERSEUSMAXINT = numeric_limits<int>::max();
+/**************************************************************************************************/
+
+vector<vector<double> > Perseus::binomial(int maxOrder){
+       try {
+               vector<vector<double> > binomial(maxOrder+1);
+               
+               for(int i=0;i<=maxOrder;i++){
+                       binomial[i].resize(maxOrder+1);
+                       binomial[i][0]=1;
+                       binomial[0][i]=0;
+               }
+               binomial[0][0]=1;
+               
+               binomial[1][0]=1;
+               binomial[1][1]=1;
+               
+               for(int i=2;i<=maxOrder;i++){
+                       binomial[1][i]=0;
+               }
+               
+               for(int i=2;i<=maxOrder;i++){
+                       for(int j=1;j<=maxOrder;j++){
+                               if(i==j){       binomial[i][j]=1;                                                                       }
+                               if(j>i) {       binomial[i][j]=0;                                                                       }
+                               else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
+                       }
+               }
+               
+               return binomial;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "binomial");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+double Perseus::basicPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, pwModel model){
+       try {
+               double GAP = model.GAP_OPEN;
+               double MATCH = model.MATCH;
+               double MISMATCH = model.MISMATCH;
+               
+               int queryLength = query.size();
+               int refLength = reference.size();
+               
+               vector<vector<double> > alignMatrix(queryLength + 1);
+               vector<vector<char> > alignMoves(queryLength + 1);
+               
+               for(int i=0;i<=queryLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[i].resize(refLength + 1, 0);
+                       alignMoves[i].resize(refLength + 1, 'x');
+               }
+               
+               for(int i=0;i<=queryLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[i][0] = GAP * i;
+                       alignMoves[i][0] = 'u';
+               }
+               
+               for(int i=0;i<=refLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[0][i] = GAP * i;
+                       alignMoves[0][i] = 'l';
+               }
+               
+               for(int i=1;i<=queryLength;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       for(int j=1;j<=refLength;j++){
+                               
+                               double nogapScore;              
+                               if(query[i-1] == reference[j-1]){       nogapScore = alignMatrix[i-1][j-1] + MATCH;             }
+                               else                                                    {       nogapScore = alignMatrix[i-1][j-1] + MISMATCH;  }
+                               
+                               double leftScore;
+                               if(i == queryLength)                    {       leftScore = alignMatrix[i][j-1];                                }
+                               else                                                    {       leftScore = alignMatrix[i][j-1] + GAP;                  }
+                               
+                               
+                               double upScore;
+                               if(j == refLength)                              {       upScore = alignMatrix[i-1][j];                                  }
+                               else                                                    {       upScore = alignMatrix[i-1][j] + GAP;                    }
+                               
+                               if(nogapScore > leftScore){
+                                       if(nogapScore > upScore){
+                                               alignMoves[i][j] = 'd';
+                                               alignMatrix[i][j] = nogapScore;
+                                       }
+                                       else{
+                                               alignMoves[i][j] = 'u';
+                                               alignMatrix[i][j] = upScore;
+                                       }
+                               }
+                               else{
+                                       if(leftScore > upScore){
+                                               alignMoves[i][j] = 'l';
+                                               alignMatrix[i][j] = leftScore;
+                                       }
+                                       else{
+                                               alignMoves[i][j] = 'u';
+                                               alignMatrix[i][j] = upScore;
+                                       }
+                               }
+                       }
+               }
+               
+               int i = queryLength;
+               int j = refLength;
+               
+               qAlign = "";
+               rAlign = "";
+                       
+               int diffs = 0;
+               int length = 0;
+               
+               while(i > 0 && j > 0){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(alignMoves[i][j] == 'd'){
+                               qAlign = query[i-1] + qAlign;
+                               rAlign = reference[j-1] + rAlign;
+
+                               if(query[i-1] != reference[j-1]){       diffs++;        }
+                               length++;
+                               
+                               i--;
+                               j--;
+                       }
+                       else if(alignMoves[i][j] == 'u'){
+                               qAlign = query[i-1] + qAlign;
+                               
+                               if(j != refLength)      {       rAlign = '-' + rAlign;  diffs++;        length++;       }
+                               else                            {       rAlign = '.' + rAlign;  }
+                               i--;
+                       }
+                       else if(alignMoves[i][j] == 'l'){
+                               rAlign = reference[j-1] + rAlign;
+                               
+                               if(i != queryLength){   qAlign = '-' + qAlign;  diffs++;        length++;       }
+                               else                            {       qAlign = '.' + qAlign;  }
+                               j--;
+                       }
+               }
+               
+               while(i>0){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       rAlign = '.' + rAlign;
+                       qAlign = query[i-1] + qAlign;
+                       i--;
+               }
+               
+               while(j>0){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       rAlign = reference[j-1] + rAlign;
+                       qAlign = '.' + qAlign;
+                       j--;
+               }
+               
+               
+
+               return double(diffs)/double(length);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "basicPairwiseAlignSeqs");
+               exit(1);
+       }
+       
+}
+/**************************************************************************************************/
+int Perseus::getDiffs(string qAlign, string rAlign, vector<int>& leftDiffs, vector<int>& leftMap, vector<int>& rightDiffs, vector<int>& rightMap){
+       try {
+               int alignLength = qAlign.length();
+               
+               int lDiffs = 0;
+               int lCount = 0;
+               for(int l=0;l<alignLength;l++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(qAlign[l] == '-'){
+                               lDiffs++;               
+                       }
+                       else if(qAlign[l] != '.'){
+                               
+                               if(rAlign[l] == '-'){
+                                       lDiffs++;
+                               }
+                               else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
+                                       lDiffs++;
+                               }
+                               leftDiffs[lCount] = lDiffs;
+                               leftMap[lCount] = l;
+                               
+                               lCount++;
+                       }                       
+               }
+               
+               int rDiffs = 0;
+               int rCount = 0;
+               for(int l=alignLength-1;l>=0;l--){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(qAlign[l] == '-'){
+                               rDiffs++;               
+                       }
+                       else if(qAlign[l] != '.'){
+                               
+                               if(rAlign[l] == '-'){
+                                       rDiffs++;
+                               }
+                               else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
+                                       rDiffs++;
+                               }
+                               
+                               rightDiffs[rCount] = rDiffs;
+                               rightMap[rCount] = l;
+                               rCount++;
+                               
+                       }
+                       
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "getDiffs");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Perseus::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, string& seqA, string& seqB){
+       try {
+               char nullReturn = -1;
+               
+               while(i>=1 && j>=1){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(direction == 'd'){
+                               if(seqA[i-1] == seqB[j-1])      {       return seqA[i-1];       }
+                               else                                            {       return nullReturn;      }
+                       }
+                       
+                       else if(direction == 'l')               {       j--;                            }
+                       else                                                    {       i--;                            }
+                       
+                       direction = alignMoves[i][j];
+               }
+               
+               return nullReturn;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "getLastMatch");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+int Perseus::toInt(char b){
+       try {
+               if(b == 'A')            {       return 0;       }
+               else if(b == 'C')       {       return 1;       }
+               else if(b == 'T')       {       return 2;       }
+               else if(b == 'G')       {       return 3;       }
+               else { m->mothurOut("[ERROR]: " + toString(b) + " is not ATGC."); m->mothurOutEndLine(); return -1; }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "toInt");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double Perseus::modeledPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, vector<vector<double> >& correctMatrix){
+       try {
+               int queryLength = query.size();
+               int refLength = reference.size();
+               
+               vector<vector<double> > alignMatrix(queryLength + 1);
+               vector<vector<char> > alignMoves(queryLength + 1);
+               
+               for(int i=0;i<=queryLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[i].resize(refLength + 1, 0);
+                       alignMoves[i].resize(refLength + 1, 'x');
+               }
+               
+               for(int i=0;i<=queryLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[i][0] = 15.0 * i;
+                       alignMoves[i][0] = 'u';
+               }
+               
+               for(int i=0;i<=refLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       alignMatrix[0][i] = 15.0 * i;
+                       alignMoves[0][i] = 'l';
+               }
+               
+               for(int i=1;i<=queryLength;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       for(int j=1;j<=refLength;j++){
+                               
+                               double nogap;           
+                               nogap = alignMatrix[i-1][j-1] + correctMatrix[toInt(query[i-1])][toInt(reference[j-1])];                        
+                               
+                               double gap;
+                               
+                               double left;
+                               if(i == queryLength){ //terminal gap
+                                       left = alignMatrix[i][j-1];
+                               }
+                               else{
+                                       if(reference[j-1] == getLastMatch('l', alignMoves, i, j, query, reference)){
+                                               gap = 4.0;
+                                       }
+                                       else{
+                                               gap = 15.0;
+                                       }
+                                       
+                                       left = alignMatrix[i][j-1] + gap;
+                               }
+                               
+                               double up;
+                               if(j == refLength){ //terminal gap
+                                       up = alignMatrix[i-1][j];
+                               }
+                               else{
+                                       
+                                       if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, reference)){
+                                               gap = 4.0;
+                                       }
+                                       else{
+                                               gap = 15.0;
+                                       }
+                                       
+                                       up = alignMatrix[i-1][j] + gap;
+                               }
+                               
+                               
+                               if(nogap < left){
+                                       if(nogap < up){
+                                               alignMoves[i][j] = 'd';
+                                               alignMatrix[i][j] = nogap;
+                                       }
+                                       else{
+                                               alignMoves[i][j] = 'u';
+                                               alignMatrix[i][j] = up;
+                                       }
+                               }
+                               else{
+                                       if(left < up){
+                                               alignMoves[i][j] = 'l';
+                                               alignMatrix[i][j] = left;
+                                       }
+                                       else{
+                                               alignMoves[i][j] = 'u';
+                                               alignMatrix[i][j] = up;
+                                       }
+                               }
+                       }
+               }
+
+               int i = queryLength;
+               int j = refLength;
+               
+               int alignLength = 0;
+               
+               while(i > 0 && j > 0){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(alignMoves[i][j] == 'd'){
+                               qAlign = query[i-1] + qAlign;
+                               rAlign = reference[j-1] + rAlign;
+                               alignLength++;
+                               i--;
+                               j--;
+                       }
+                       else if(alignMoves[i][j] == 'u'){
+                               if(j != refLength){
+                                       qAlign = query[i-1] + qAlign;
+                                       rAlign = '-' + rAlign;
+                                       alignLength++;
+                               }
+                               
+                               i--;
+                       }
+                       else if(alignMoves[i][j] == 'l'){
+                               if(i != queryLength){
+                                       qAlign = '-' + qAlign;
+                                       rAlign = reference[j-1] + rAlign;
+                                       alignLength++;                          
+                               }
+                               
+                               j--;
+                       }
+               }
+
+               return alignMatrix[queryLength][refLength] / (double)alignLength;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "modeledPairwiseAlignSeqs");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+int Perseus::getAlignments(int curSequenceIndex, vector<seqData> sequences, vector<pwAlign>& alignments, vector<vector<int> >& leftDiffs, vector<vector<int> >& leftMaps, vector<vector<int> >& rightDiffs, vector<vector<int> >& rightMaps, int& bestRefSeq, int& bestRefDiff, vector<bool>& restricted){
+       try {
+               int numSeqs = sequences.size();
+               //int bestSequenceMismatch = PERSEUSMAXINT;
+
+               string curSequence = sequences[curSequenceIndex].sequence;
+               int curFrequency = sequences[curSequenceIndex].frequency; 
+
+               bestRefSeq = -1;
+               
+               int bestIndex = -1;
+               int bestDiffs = PERSEUSMAXINT;
+               int comparisons = 0;
+                       
+               pwModel model(0, -1, -1.5);
+               
+               for(int i=0;i<numSeqs;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(i != curSequenceIndex && restricted[i] != 1 && sequences[i].frequency >= 2 * curFrequency){
+                               string refSequence = sequences[i].sequence;
+                               
+                               leftDiffs[i].assign(curSequence.length(), 0);
+                               leftMaps[i].assign(curSequence.length(), 0);
+                               rightDiffs[i].assign(curSequence.length(), 0);
+                               rightMaps[i].assign(curSequence.length(), 0);
+                               
+                               basicPairwiseAlignSeqs(curSequence, refSequence, alignments[i].query, alignments[i].reference, model);
+                               
+
+                               getDiffs(alignments[i].query, alignments[i].reference, leftDiffs[i], leftMaps[i], rightDiffs[i], rightMaps[i]);
+                               
+                               int diffs = rightDiffs[i][curSequence.length()-1];                                                      
+
+                               if(diffs < bestDiffs){
+                                       bestDiffs = diffs;
+                                       bestIndex = i;
+                               }
+                               comparisons++;
+                               restricted[i] = 0;
+                       }
+                       else{
+                               restricted[i] = 1;
+                       }
+               }
+
+               bestRefSeq = bestIndex;
+               bestRefDiff = bestDiffs;
+               
+               return comparisons;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "getAlignments");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Perseus::getChimera(vector<seqData> sequences,
+                          vector<vector<int> >& leftDiffs, 
+                          vector<vector<int> >& rightDiffs,
+                          int& leftParent, 
+                          int& rightParent, 
+                          int& breakPoint,
+                          vector<int>& singleLeft, 
+                          vector<int>& bestLeft, 
+                          vector<int>& singleRight, 
+                          vector<int>& bestRight, 
+                          vector<bool> restricted){
+       try {
+               int numRefSeqs = restricted.size();
+               int seqLength = leftDiffs[0].size();
+               
+               singleLeft.resize(seqLength, PERSEUSMAXINT);
+               bestLeft.resize(seqLength, -1);
+
+               for(int l=0;l<seqLength;l++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       for(int i=0;i<numRefSeqs;i++){
+                               
+                               if(restricted[i] == 0){
+                                       if(leftDiffs[i][l] < singleLeft[l] && sequences[i].frequency || (leftDiffs[i][l] == singleLeft[l] && sequences[i].frequency > sequences[bestLeft[l]].frequency)){
+                                               singleLeft[l] = leftDiffs[i][l];
+                                               bestLeft[l] = i;
+                                       }
+                               }
+                       }
+               }
+               
+               singleRight.resize(seqLength, PERSEUSMAXINT);
+               bestRight.resize(seqLength, -1);
+               
+               for(int l=0;l<seqLength;l++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       for(int i=0;i<numRefSeqs;i++){
+                               
+                               if(restricted[i] == 0){
+                                       if(rightDiffs[i][l] < singleRight[l] && sequences[i].frequency || (rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency)){
+                                               singleRight[l] = rightDiffs[i][l];
+                                               bestRight[l] = i;
+                                       }
+                               }
+                       }
+               }
+
+               
+               
+               int bestChimeraMismatches = PERSEUSMAXINT;
+               leftParent = -1;
+               rightParent = -1;
+               breakPoint = -1;
+               
+               for(int l=0;l<seqLength;l++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       int chimera = singleLeft[l] + singleRight[seqLength - l - 2];
+                       if(chimera < bestChimeraMismatches){
+                               bestChimeraMismatches = chimera;
+                               breakPoint = l;
+                               leftParent = bestLeft[l];
+                               rightParent = bestRight[seqLength - l - 2];
+                       }
+               }
+               
+               return bestChimeraMismatches;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "getChimera");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+string Perseus::stitchBimera(vector<pwAlign>& alignments, int leftParent, int rightParent, int breakPoint, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
+       try {
+               int breakLeft = leftMaps[leftParent][breakPoint];
+               int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2];
+               
+               string left = alignments[leftParent].reference;
+               string right = alignments[rightParent].reference;
+               string chimera = "";
+               
+               for(int i=0;i<=breakLeft;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(left[i] != '-' && left[i] != '.'){
+                               chimera += left[i];
+                       }
+               }
+               
+
+               for(int i=breakRight;i<right.length();i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(right[i] != '-' && right[i] != '.'){
+                               chimera += right[i];
+                       }
+               }
+               
+               return chimera;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "stitchBimera");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Perseus::getTrimera(vector<seqData>& sequences,
+                          vector<vector<int> >& leftDiffs,
+                          int& leftParent,
+                          int& middleParent,
+                          int& rightParent,
+                          int& breakPointA,
+                          int& breakPointB,
+                          vector<int>& singleLeft,
+                          vector<int>& bestLeft, 
+                          vector<int>& singleRight,
+                          vector<int>& bestRight,
+                          vector<bool> restricted){
+       try {
+               int numRefSeqs = leftDiffs.size();
+               int alignLength = leftDiffs[0].size();
+               int bestTrimeraMismatches = PERSEUSMAXINT;
+               
+               leftParent = -1;
+               middleParent = -1;
+               rightParent = -1;
+               
+               breakPointA = -1;
+               breakPointB = -1;
+               
+               vector<vector<int> > minDelta(alignLength);
+               vector<vector<int> > minDeltaSeq(alignLength);
+               
+               for(int i=0;i<alignLength;i++){
+                       if (m->control_pressed) { return 0; }
+                       minDelta[i].assign(alignLength, PERSEUSMAXINT);
+                       minDeltaSeq[i].assign(alignLength, -1);
+               }
+               
+               for(int x=0;x<alignLength;x++){
+                       for(int y=x;y<alignLength-1;y++){
+                               for(int i=0;i<numRefSeqs;i++){
+                                       
+                                       if (m->control_pressed) { return 0; }
+                                       
+                                       if(restricted[i] == 0){
+                                               int delta = leftDiffs[i][y] - leftDiffs[i][x];
+
+                                               if(delta < minDelta[x][y] || delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency){
+                                                       minDelta[x][y] = delta;
+                                                       minDeltaSeq[x][y] = i;                                  
+                                               }                               
+                                       }
+                               }
+                               minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
+                               
+                               if(minDelta[x][y] < bestTrimeraMismatches){
+                                       bestTrimeraMismatches = minDelta[x][y];
+                                       
+                                       breakPointA = x;
+                                       breakPointB = y;
+                                       
+                                       leftParent = bestLeft[x];
+                                       middleParent = minDeltaSeq[x][y];
+                                       rightParent = bestRight[alignLength - y - 2];                           
+                               }
+                       }               
+               }
+               
+               return bestTrimeraMismatches;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "getTrimera");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+string Perseus::stitchTrimera(vector<pwAlign> alignments, int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
+       try {
+               int p1SplitPoint = leftMaps[leftParent][breakPointA];
+               int p2SplitPoint = leftMaps[middleParent][breakPointB];
+               int p3SplitPoint = rightMaps[rightParent][rightMaps[rightParent].size() - breakPointB - 2];
+               
+               string chimeraRefSeq;
+               for(int i=0;i<=p1SplitPoint;i++){
+                       if (m->control_pressed) { return chimeraRefSeq; }
+                       if(alignments[leftParent].reference[i] != '-' && alignments[leftParent].reference[i] != '.'){
+                               chimeraRefSeq += alignments[leftParent].reference[i];
+                       }
+               }
+               
+               for(int i=p1SplitPoint+1;i<=p2SplitPoint;i++){
+                       if (m->control_pressed) { return chimeraRefSeq; }
+                       if(alignments[middleParent].reference[i] != '-' && alignments[middleParent].reference[i] != '.'){
+                               chimeraRefSeq += alignments[middleParent].reference[i];
+                       }
+               }
+               
+               for(int i=p3SplitPoint;i<alignments[rightParent].reference.length();i++){
+                       if (m->control_pressed) { return chimeraRefSeq; }
+                       if(alignments[rightParent].reference[i] != '-' && alignments[rightParent].reference[i] != '.'){
+                               chimeraRefSeq += alignments[rightParent].reference[i];
+                       }
+               }
+
+               return chimeraRefSeq;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "stitchTrimera");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+int Perseus::threeWayAlign(string query, string parent1, string parent2, string& qAlign, string& aAlign, string& bAlign){
+       try {
+               pwModel model(1.0, -1.0, -5.0);
+               
+               string qL, rL;
+               string qR, rR;
+
+               basicPairwiseAlignSeqs(query, parent1, qL, rL, model);  
+               basicPairwiseAlignSeqs(query, parent2, qR, rR, model);
+
+               int lLength = qL.length();
+               int rLength = qR.length();
+               
+               string qLNew, rLNew;
+               string qRNew, rRNew;
+               
+               int lIndex = 0;
+               int rIndex = 0;
+               
+               while(lIndex<lLength || rIndex<rLength){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(qL[lIndex] == qR[rIndex]){
+                               qLNew += qL[lIndex];
+                               rLNew += rL[lIndex];
+                               lIndex++;
+
+                               qRNew += qR[rIndex];
+                               rRNew += rR[rIndex];
+                               rIndex++;
+                       }
+                       else if(qL[lIndex] == '-' || qL[lIndex] == '.'){
+                               //insert a gap into the right sequences
+                               qLNew += qL[lIndex];
+                               rLNew += rL[lIndex];
+                               lIndex++;
+                               
+                               if(rIndex != rLength){
+                                       qRNew += '-';
+                                       rRNew += '-';
+                               }
+                               else{
+                                       qRNew += '.';
+                                       rRNew += '.';
+                               }
+                       }
+                       else if(qR[rIndex] == '-' || qR[rIndex] == '.'){
+                               //insert a gap into the left sequences
+                               qRNew += qR[rIndex];
+                               rRNew += rR[rIndex];
+                               rIndex++;
+                               
+
+                               if(lIndex != lLength){
+                                       qLNew += '-';
+                                       rLNew += '-';
+                               }
+                               else{
+                                       qLNew += '.';
+                                       rLNew += '.';
+                               }
+                               
+                       }
+               }
+               
+               qAlign = qLNew;
+               aAlign = rLNew;
+               bAlign = rRNew;
+               
+               bool qStart = 0;
+               bool aStart = 0;
+               bool bStart = 0;
+               
+               for(int i=0;i<qAlign.length();i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(qStart == 0){
+                               if(qAlign[i] == '-')    {       qAlign[i] = '.';        }
+                               else                                    {       qStart = 1;                     }
+                       }
+                       if(aStart == 0){
+                               if(aAlign[i] == '-')    {       aAlign[i] = '.';        }
+                               else                                    {       aStart = 1;                     }
+                       }
+                       if(bStart == 0){
+                               if(bAlign[i] == '-')    {       bAlign[i] = '.';        }
+                               else                                    {       bStart = 1;                     }
+                       }
+                       if(aStart == 1 && bStart == 1 && qStart == 1){
+                               break;
+                       }
+               }
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "threeWayAlign");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double Perseus::calcLoonIndex(string query, string parent1, string parent2, int breakPoint, vector<vector<double> >& binMatrix){
+       try {
+               string queryAln, leftParentAln, rightParentAln;
+               threeWayAlign(query, parent1, parent2, queryAln, leftParentAln, rightParentAln);
+               
+               int alignLength = queryAln.length();
+
+               int endPos = alignLength;
+               for(int i=alignLength-1;i>=0; i--){
+                       if(queryAln[i] != '.' && leftParentAln[i] != '.' && rightParentAln[i] != '.'){
+                               endPos = i + 1;
+                               break;
+                       }
+               }
+               
+               int diffToLeftCount = 0;
+               vector<int> diffToLeftMap(alignLength, 0);
+               
+               int diffToRightCount = 0;
+               vector<int> diffToRightMap(alignLength, 0);
+               
+               for(int i=0;i<endPos;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(queryAln[i] != leftParentAln[i]){
+                               diffToLeftMap[diffToLeftCount] = i;
+                               diffToLeftCount++;
+                       }
+                       
+                       if(queryAln[i] != rightParentAln[i]){
+                               diffToRightMap[diffToRightCount] = i;
+                               diffToRightCount++;
+                       }
+               }
+               
+               
+               
+               diffToLeftMap[diffToLeftCount] = endPos;
+               diffToRightMap[diffToRightCount] = endPos;
+               
+               int indexL = 0;
+               int indexR = 0;
+               int indexS = 0;
+               
+               vector<int> diffs;
+               vector<int> splits;
+               
+               splits.push_back(-1);
+               diffs.push_back(diffToRightCount);
+               indexS++;
+                       
+               while(indexL < diffToLeftCount || indexR < diffToRightCount){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(diffToLeftMap[indexL] <= diffToRightMap[indexR]){
+                               diffs.push_back(diffs[indexS - 1] + 1);
+                               splits.push_back(diffToLeftMap[indexL]);
+                               
+                               indexL++;
+                               indexS++;                       
+                       }
+                       else if(diffToLeftMap[indexL] > diffToRightMap[indexR]) {
+                               diffs.push_back(diffs[indexS - 1] - 1);
+                               splits.push_back(diffToRightMap[indexR]);
+                               
+                               indexR++;
+                               indexS++;                       
+                       }
+               }
+               
+               int minDiff = PERSEUSMAXINT;
+               int minIndex = -1;
+               for(int i=0;i<indexS;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if(diffs[i] < minDiff){
+                               minDiff = diffs[i];
+                               minIndex = i;
+                       }
+               }
+               
+               int splitPos = endPos;
+               if(minIndex < indexS - 1){
+                       splitPos = (splits[minIndex]+splits[minIndex+1]) / 2;
+               }
+               
+               int diffToChimera = 0;
+               int leftDiffToP1 = 0;
+               int rightDiffToP1 = 0;
+               int leftDiffToP2 = 0;
+               int rightDiffToP2 = 0;
+               
+               for(int i=0;i<endPos;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       char bQuery = queryAln[i];
+                       char bP1 = leftParentAln[i];
+                       char bP2 = rightParentAln[i];
+                       
+                       char bConsensus = bQuery;
+                       if(bP1 == bP2){ bConsensus = bP1;       }
+                       
+                       if(bConsensus != bQuery){
+                               diffToChimera++;
+                       }
+                       
+                       if(bConsensus != bP1){
+                               if(i <= splitPos){
+                                       leftDiffToP1++;
+                               }
+                               else{
+                                       rightDiffToP1++;                                
+                               }
+                       }
+                       if(bConsensus != bP2){
+                               if(i <= splitPos){
+                                       leftDiffToP2++;
+                               }
+                               else{
+                                       rightDiffToP2++;                                
+                               }
+                       }
+               }               
+               
+
+               int diffToClosestParent, diffToFurtherParent;
+               int xA, xB, yA, yB;
+               double aFraction, bFraction;
+               
+               if(diffToLeftCount <= diffToRightCount){        //if parent 1 is closer
+
+                       diffToClosestParent = leftDiffToP1 + rightDiffToP1;
+                       xA = leftDiffToP1;
+                       xB = rightDiffToP1;
+                       
+                       diffToFurtherParent = leftDiffToP2 + rightDiffToP2;
+                       yA = leftDiffToP2;
+                       yB = rightDiffToP2;
+                       
+                       aFraction = double(splitPos + 1)/(double) endPos;
+                       bFraction = 1 - aFraction;
+                       
+               }
+               else{                                                                                           //if parent 2 is closer
+
+                       diffToClosestParent = leftDiffToP2 + rightDiffToP2;
+                       xA = rightDiffToP2;
+                       xB = leftDiffToP2;
+                       
+                       diffToFurtherParent = leftDiffToP1 + rightDiffToP1;
+                       yA = rightDiffToP1;
+                       yB = leftDiffToP1;
+                       
+                       bFraction = double(splitPos + 1)/(double) endPos;
+                       aFraction = 1 - bFraction;
+
+               }
+               
+               double loonIndex = 0;
+               
+               int totalDifference = diffToClosestParent + diffToChimera;
+               
+               if(totalDifference > 0){
+                       double prob = 0;
+                       
+                       for(int i=diffToClosestParent;i<=totalDifference;i++){
+                               prob += binMatrix[totalDifference][i] * pow(0.50, i) * pow(0.50, totalDifference - i);
+                       }
+                       loonIndex += -log(prob);
+               }
+               
+               if(diffToFurtherParent > 0){
+                       double prob = 0;
+                       
+                       for(int i=yA;i<=diffToFurtherParent;i++){
+                               prob += binMatrix[diffToFurtherParent][i] * pow(aFraction, i) * pow(1-aFraction, diffToFurtherParent - i);
+                       }
+                       loonIndex += -log(prob);
+               }
+               
+               if(diffToClosestParent > 0){
+                       double prob = 0;
+                       
+                       for(int i=xB;i<=diffToClosestParent;i++){
+                               prob += binMatrix[diffToClosestParent][i] * pow(bFraction, i) * pow(1-bFraction, diffToClosestParent - i);
+                       }
+                       loonIndex += -log(prob);
+               }
+               
+               return loonIndex;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "calcLoonIndex");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double Perseus::calcBestDistance(string query, string reference){
+       try {
+               int alignLength = query.length();
+               int mismatch = 0;
+               int counter = 0;
+               
+               for(int i=0;i<alignLength;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       if((query[i] != '.' || reference[i] != '.') && (query[i] != '-' && reference[i] != '-')){
+                               if(query[i] != reference[i]){   mismatch++;     }
+                               counter++;                      
+                       }
+               }
+               
+               return (double)mismatch / (double)counter;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "calcBestDistance");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double Perseus::classifyChimera(double singleDist, double cIndex, double loonIndex, double alpha, double beta){
+       try {
+               double difference = cIndex - singleDist;        //y
+               double probability;
+               
+               if(cIndex >= 0.15 || difference > 0.00){
+                       probability = 0.0000;
+               }
+               else{
+                       probability = 1.0 / (1.0 + exp(-(alpha + beta * loonIndex)));
+               }
+               
+               return probability;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Perseus", "classifyChimera");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
diff --git a/myPerseus.h b/myPerseus.h

new file mode 100644 (file)

index 0000000..93ef8ae
--- /dev/null
+++ b/myPerseus.h
@@ -0,0 +1,85 @@
+#ifndef MOTHURPERSEUS
+#define MOTHURPERSEUS
+
+/*
+ *  myPerseus.h
+ *  
+ *
+ *  Created by Pat Schloss on 9/5/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+#include "mothurout.h"
+
+/**************************************************************************************************/
+struct seqData {
+       
+       seqData(string name, string seq, int freq) : seqName(name), sequence(seq), frequency(freq) {    }
+       
+       bool operator<( seqData const& rhs ) const {
+               
+               bool verdict = 0;
+               
+               if(frequency < rhs.frequency){
+                       verdict = 1;
+               }
+               else if(frequency == rhs.frequency){
+                       verdict = (seqName > rhs.seqName);
+               }
+               
+               return verdict; 
+       }
+       
+       string seqName;
+       string sequence;
+       int frequency;
+};
+/**************************************************************************************************/
+struct pwModel {
+       pwModel(double m, double mm, double g): MATCH(m), MISMATCH(mm), GAP_OPEN(g) {;}
+       double MATCH;
+       double MISMATCH;
+       double GAP_OPEN;        
+};
+/**************************************************************************************************/
+struct pwAlign {
+       pwAlign(): query(""), reference(""){}
+       pwAlign(string q, string r): query(q), reference(r){}
+       string query;
+       string reference;
+       
+};
+/**************************************************************************************************/
+class Perseus {
+       
+public:
+       Perseus() { m = MothurOut::getInstance(); }
+       ~Perseus() {}
+       
+       vector<vector<double> > binomial(int);
+       double modeledPairwiseAlignSeqs(string, string, string&, string&, vector<vector<double> >&);
+       int getAlignments(int, vector<seqData>, vector<pwAlign>&, vector<vector<int> >& , vector<vector<int> >&, vector<vector<int> >&, vector<vector<int> >&, int&, int&, vector<bool>&);
+       int getChimera(vector<seqData>,vector<vector<int> >&, vector<vector<int> >&,int&, int&, int&,vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<bool>);
+       string stitchBimera(vector<pwAlign>&, int, int, int, vector<vector<int> >&, vector<vector<int> >&);
+       int getTrimera(vector<seqData>&, vector<vector<int> >&, int&, int&, int&, int&, int&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<bool>);
+       string stitchTrimera(vector<pwAlign>, int, int, int, int, int, vector<vector<int> >&, vector<vector<int> >&);
+       double calcLoonIndex(string, string, string, int, vector<vector<double> >&);
+       double classifyChimera(double, double, double, double, double);
+       
+private:
+       MothurOut* m;
+       int toInt(char);
+       double basicPairwiseAlignSeqs(string, string, string&, string&, pwModel);
+       int getDiffs(string, string, vector<int>&, vector<int>&, vector<int>&, vector<int>&);
+       int getLastMatch(char, vector<vector<char> >&, int, int, string&, string&);
+       int threeWayAlign(string, string, string, string&, string&, string&);
+       double calcBestDistance(string, string);
+
+       
+};
+/**************************************************************************************************/
+#endif
+
+
diff --git a/phylosummary.cpp b/phylosummary.cpp

index b645bac794777a1050de116407fa834370e7940e..22e8e75ce7047e00b135483f16b566eb506a71fb 100644 (file)
--- a/phylosummary.cpp
+++ b/phylosummary.cpp
@@ -131,7 +131,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                 int level = 0;
                 
                 //are there confidence scores, if so remove them
-               if (seqTaxonomy.find_first_of('(') != -1) {  removeConfidences(seqTaxonomy);    }
+               if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy); }
                 
                 while (seqTaxonomy != "") {
                         
@@ -229,7 +229,7 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                 int level = 0;
                 
                 //are there confidence scores, if so remove them
-               if (seqTaxonomy.find_first_of('(') != -1) {  removeConfidences(seqTaxonomy);    }
+               if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy); }
                 
                 while (seqTaxonomy != "") {
                         
@@ -496,35 +496,6 @@ void PhyloSummary::readTreeStruct(ifstream& in){
         }
  }
  /**************************************************************************************************/
-void PhyloSummary::removeConfidences(string& tax) {
-       try {
-               
-               string taxon;
-               string newTax = "";
-               
-               while (tax.find_first_of(';') != -1) {
-                       //get taxon
-                       taxon = tax.substr(0,tax.find_first_of(';'));
-                       
-                       int pos = taxon.find_first_of('(');
-                       if (pos != -1) {
-                               taxon = taxon.substr(0, pos); //rip off confidence 
-                       }
-                       
-                       taxon += ";";
-                       
-                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
-                       newTax += taxon;
-               }
-               
-               tax = newTax;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PhyloSummary", "removeConfidences");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
  
  
         
diff --git a/phylosummary.h b/phylosummary.h

index 3adcc3f5fb0ed1f31bf0f2c223dff553f487a61a..cdec0d0a4ac7ca83cb0845988adb90152bcae07b 100644 (file)
--- a/phylosummary.h
+++ b/phylosummary.h
@@ -55,8 +55,6 @@ private:
         int numSeqs;
         int maxLevel;
         MothurOut* m;
-       
-       void removeConfidences(string&);
  };
  
  /**************************************************************************************************/
diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp

index b72f4b669d43b4b44fa1c20df4812155aaff82fc..f55158fc259ace27c87d5a4fcca10e7c55539f13 100644 (file)
--- a/removelineagecommand.cpp
+++ b/removelineagecommand.cpp
@@ -555,7 +555,8 @@ int RemoveLineageCommand::readTax(){
                         if (hasConPos != string::npos) {  
                                 taxonsHasConfidence[i] = true; 
                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
-                               noConfidenceTaxons[i] = removeConfidences(listOfTaxons[i]);
+                               noConfidenceTaxons[i] = listOfTaxons[i];
+                               m->removeConfidences(noConfidenceTaxons[i]);
                         }
                 }
                 
@@ -577,7 +578,8 @@ int RemoveLineageCommand::readTax(){
                                         
                                         int hasConfidences = tax.find_first_of('(');
                                         if (hasConfidences != string::npos) { 
-                                               newtax = removeConfidences(tax);
+                                               newtax = tax;
+                                               m->removeConfidences(newtax);
                                         }
                                         
                                         int pos = newtax.find(noConfidenceTaxons[j]);
@@ -609,7 +611,8 @@ int RemoveLineageCommand::readTax(){
                                                 string noNewTax = tax;
                                                 int hasConfidences = tax.find_first_of('(');
                                                 if (hasConfidences != string::npos) { 
-                                                       noNewTax = removeConfidences(tax);
+                                                       noNewTax = tax;
+                                                       m->removeConfidences(noNewTax);
                                                 }
                                                 
                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
@@ -726,29 +729,6 @@ vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
                 exit(1);
         }
  }
-/**************************************************************************************************/
-string RemoveLineageCommand::removeConfidences(string tax) {
-       try {
-               
-               string taxon = "";
-               int taxLength = tax.length();
-               for(int i=0;i<taxLength;i++){
-                       if(tax[i] == ';'){
-                               taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
-                               taxon += ";";
-                       }
-                       else{
-                               taxon += tax[i];
-                       }
-               }
-                               
-               return taxon;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "RemoveLineageCommand", "removeConfidences");
-               exit(1);
-       }
-}
  //**********************************************************************************************************************
  //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
  int RemoveLineageCommand::readAlign(){
diff --git a/removelineagecommand.h b/removelineagecommand.h

index bda398b76b7cf8b0b2bf86c618e8adff71a1ff3a..a4ffea9c529829da3c3f08cfbc41c5e3bdf5ddcf 100644 (file)
--- a/removelineagecommand.h
+++ b/removelineagecommand.h
@@ -42,7 +42,6 @@ class RemoveLineageCommand : public Command {
                 int readAlign();
                 int readList();
                 int readTax();  
-               string removeConfidences(string);
                 vector< map<string, float> > getTaxons(string);
  };
  
diff --git a/taxonomyequalizer.cpp b/taxonomyequalizer.cpp

index 2f6fc77c7c29f689c6283c68c76ae14d8eea2b79..c0507261eaf2c96c9381172a646317c7cb6a1db3 100644 (file)
--- a/taxonomyequalizer.cpp
+++ b/taxonomyequalizer.cpp
@@ -46,7 +46,7 @@ TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir
                                 
                                 in >> name >> tax;   m->gobble(in);
                                 
-                               if (containsConfidence) {  removeConfidences(tax);      }
+                               if (containsConfidence) {  m->removeConfidences(tax);   }
                                 
                                 //is this a taxonomy that needs to be extended?
                                 if (seqLevels[name] < highestLevel) {
@@ -152,29 +152,4 @@ void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel)
         }
  }
  /**************************************************************************************************/
-void TaxEqualizer::removeConfidences(string& tax) {
-       try {
-               
-               string taxon;
-               string newTax = "";
-               
-               while (tax.find_first_of(';') != -1) {
-                       //get taxon
-                       taxon = tax.substr(0,tax.find_first_of(';'));
-                       taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
-                       taxon += ";";
-                       
-                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
-                       newTax += taxon;
-               }
-               
-               tax = newTax;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TaxEqualizer", "removeConfidences");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
  
diff --git a/taxonomyequalizer.h b/taxonomyequalizer.h

index c1e43f446f4700bd52fd1a17641759b2adbf7f27..191267c1a44833c69437b2f8dbbb616e7566cf60 100644 (file)
--- a/taxonomyequalizer.h
+++ b/taxonomyequalizer.h
@@ -39,7 +39,6 @@ private:
         int getHighestLevel(ifstream&);  //scans taxonomy file to find taxonomy with highest level
         void extendTaxonomy(string, string&, int);  //name, taxonomy, desired level
         void truncateTaxonomy(string, string&, int);  //name, taxonomy, desired level
-       void removeConfidences(string&);  //removes the confidence limits on the taxon 
         MothurOut* m;
         
  };
author	westcott <westcott>
	Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)
committer	westcott <westcott>
	Mon, 7 Nov 2011 16:22:50 +0000 (16:22 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
chimeraperseuscommand.cpp	[new file with mode: 0644]	patch \| blob
chimeraperseuscommand.h	[new file with mode: 0644]	patch \| blob
chimerauchimecommand.cpp		patch \| blob \| history
classifyotucommand.cpp		patch \| blob \| history
classifyotucommand.h		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
getlineagecommand.cpp		patch \| blob \| history
getlineagecommand.h		patch \| blob \| history
metastatscommand.cpp		patch \| blob \| history
mothurmetastats.cpp		patch \| blob \| history
mothurout.cpp		patch \| blob \| history
mothurout.h		patch \| blob \| history
myPerseus.cpp	[new file with mode: 0644]	patch \| blob
myPerseus.h	[new file with mode: 0644]	patch \| blob
phylosummary.cpp		patch \| blob \| history
phylosummary.h		patch \| blob \| history
removelineagecommand.cpp		patch \| blob \| history
removelineagecommand.h		patch \| blob \| history
taxonomyequalizer.cpp		patch \| blob \| history
taxonomyequalizer.h		patch \| blob \| history