]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraperseuscommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / chimeraperseuscommand.cpp
index 240ba65adf628099e8234ed7c8c7a692913d71ee..7ae5d69141979736bcb2b6fcfb25e1ba4682086d 100644 (file)
 #include "chimeraperseuscommand.h"
 #include "deconvolutecommand.h"
 #include "sequence.hpp"
+#include "counttable.h"
+#include "sequencecountparser.h"
 //**********************************************************************************************************************
 vector<string> ChimeraPerseusCommand::setParameters(){ 
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -36,10 +39,11 @@ vector<string> ChimeraPerseusCommand::setParameters(){
 string ChimeraPerseusCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
+               helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
                helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file associated with your fasta file. If none is given unique.seqs will be run to generate one. \n";
+               helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -58,6 +62,27 @@ string ChimeraPerseusCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){  
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "chimera") {  outputFileName =  "perseus.chimeras"; }
+            else if (type == "accnos") {  outputFileName =  "perseus.accnos"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 ChimeraPerseusCommand::ChimeraPerseusCommand(){        
        try {
                abort = true; calledHelp = true;
@@ -75,6 +100,8 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){
 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
        try {
                abort = false; calledHelp = false; 
+        hasCount = false;
+        hasName = false;
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -86,7 +113,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
                        
-                       ValidParameters validParameter("chimera.uchime");
+                       ValidParameters validParameter("chimera.perseus");
                        map<string,string>::iterator it;
                        
                        //check to make sure all parameters are valid for command
@@ -182,15 +209,9 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        
                        //check for required parameters
-                       bool hasName = true;
                        namefile = validParameter.validFile(parameters, "name", false);
-                       if (namefile == "not found") { 
-                               //if there is a current fasta file, use it
-                               string filename = m->getNameFile(); 
-                               if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }                                
-                               hasName = false;
-                       }else { 
+                       if (namefile == "not found") { namefile = "";   }
+                       else { 
                                m->splitAtDash(namefile, nameFileNames);
                                
                                //go through files and make sure they are good, if not, then disregard them
@@ -256,12 +277,101 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                                                }
                                        }
                                }
+                       }
+            
+            if (nameFileNames.size() != 0) { hasName = true; }
+            
+            //check for required parameters
+            vector<string> countfileNames;
+                       countfile = validParameter.validFile(parameters, "count", false);
+                       if (countfile == "not found") { 
+                countfile = "";  
+                       }else { 
+                               m->splitAtDash(countfile, countfileNames);
                                
-                               //make sure there is at least one valid file left
-                               if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < countfileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (countfileNames[i] == "current") { 
+                                               countfileNames[i] = m->getCountTableFile(); 
+                                               if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(countfileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setCountTableFile(countfileNames[i]);
+                                               }
+                                       }
+                               }
                        }
-                       
-                       if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+            
+            if (countfileNames.size() != 0) { hasCount = true; }
+            
+                       //make sure there is at least one valid file left
+            if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
+            if (!hasName && !hasCount) { 
+                //if there is a current name file, use it, else look for current count file
+                               string filename = m->getNameFile(); 
+                               if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
+                               else { 
+                    filename = m->getCountTableFile();
+                    if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                    else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
+                }
+            }
+            if (!hasName && hasCount) { nameFileNames = countfileNames; }
+            
+                       if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
                        bool hasGroup = true;
                        groupfile = validParameter.validFile(parameters, "group", false);
@@ -339,22 +449,23 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
+            if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.50";  }
-                       convert(temp, cutoff);
+                       m->mothurConvert(temp, cutoff);
                        
                        temp = validParameter.validFile(parameters, "alpha", false);    if (temp == "not found"){       temp = "-5.54"; }
-                       convert(temp, alpha);
+                       m->mothurConvert(temp, alpha);
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
-                       convert(temp, beta);
+                       m->mothurConvert(temp, beta);
                }
        }
        catch(exception& e) {
@@ -376,8 +487,9 @@ int ChimeraPerseusCommand::execute(){
                        
                        int start = time(NULL); 
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "perseus.accnos";
+                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+
                        //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                        
                        //you provided a groupfile
@@ -393,41 +505,82 @@ int ChimeraPerseusCommand::execute(){
                        
                        int numSeqs = 0;
                        int numChimeras = 0;
-                       
-                       if (groupFile != "") {
-                               //Parse sequences by group
-                               SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
-                               vector<string> groups = parser.getNamesOfGroups();
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
-                               
-                               //clears files
-                               ofstream out, out1, out2;
-                               m->openOutputFile(outputFileName, out); out.close(); 
-                               m->openOutputFile(accnosFileName, out1); out1.close();
-                               
-                               if(processors == 1)     {       numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);       }
-                               else                            {       numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                               numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
-                               
-                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                       }else{
-                               if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
-                               
-                               //read sequences and store sorted by frequency
-                               vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
-                               
-                               numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+            
+            if (hasCount) {
+                CountTable* ct = new CountTable();
+                ct->readTable(nameFile);
+                
+                if (ct->hasGroupInfo()) {
+                    cparser = new SequenceCountParser(fastaFileNames[s], *ct);
+                    
+                    vector<string> groups = cparser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
+                    
+                    if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = cparser->getAllSeqsMap();
+                    numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    delete cparser;
+
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;  } 
+                    
+                }else {
+                    if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
+                    
+                    if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
+                }
+                delete ct;
+            }else {
+                if (groupFile != "") {
+                    //Parse sequences by group
+                    parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+                    vector<string> groups = parser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, 0, groups.size(), groups);       }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
+                    
+                    if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = parser->getAllSeqsMap();
+                    numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    delete parser;
+                    
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0;  }         
+                }else{
+                    if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
+                    
+                    if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+                }
                        }
-                       
+            
                        if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                        
                        m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
@@ -466,14 +619,15 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -487,7 +641,7 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
        }
 }
 //**********************************************************************************************************************
-int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector<string> groups){
+int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, int start, int end, vector<string> groups){
        try {
                
                int totalSeqs = 0;
@@ -499,7 +653,7 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                        
                        int start = time(NULL);  if (m->control_pressed) {  return 0; }
                        
-                       vector<seqData> sequences = loadSequences(parser, groups[i]);
+                       vector<seqData> sequences = loadSequences(groups[i]);
                        
                        if (m->control_pressed) { return 0; }
                        
@@ -524,30 +678,48 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
        }
 }      
 //**********************************************************************************************************************
-vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){
+vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
        try {
-               
-               vector<Sequence> thisGroupsSeqs = parser.getSeqs(group);
-               map<string, string> nameMap = parser.getNameMap(group);
-               map<string, string>::iterator it;
-               
-               vector<seqData> sequences;
-               bool error = false;
-               
-               for (int i = 0; i < thisGroupsSeqs.size(); i++) {
-               
-                       if (m->control_pressed) {  return sequences; }
-                       
-                       it = nameMap.find(thisGroupsSeqs[i].getName());
-                       if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
-                       else {
-                               int num = m->getNumNames(it->second);
-                               sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
-                       }
+        bool error = false;
+               alignLength = 0;
+        vector<seqData> sequences;
+        if (hasCount) {
+            vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
+            map<string, int> counts = cparser->getCountTable(group);
+            map<string, int>::iterator it;
+            
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = counts.find(thisGroupsSeqs[i].getName());
+                if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
+                else {
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+        }else{
+            vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
+            map<string, string> nameMap = parser->getNameMap(group);
+            map<string, string>::iterator it;
+           
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = nameMap.find(thisGroupsSeqs[i].getName());
+                if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
+                else {
+                    int num = m->getNumNames(it->second);
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+            
                }
                
-               if (error) { m->control_pressed = true; }
-               
+        if (error) { m->control_pressed = true; }
                //sort by frequency
                sort(sequences.rbegin(), sequences.rend());
                
@@ -570,7 +742,8 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                bool error = false;
                ifstream in;
                m->openInputFile(inputFile, in);
-               
+               alignLength = 0;
+        
                while (!in.eof()) {
                        
                        if (m->control_pressed) { in.close(); return sequences; }
@@ -581,6 +754,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                        if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                        else {
                                sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                        }
                }
                in.close();
@@ -592,6 +766,37 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                
                return sequences;
        }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
+       try {           
+               //read fasta file and create sequenceData structure - checking for file mismatches
+               vector<seqData> sequences;
+               ifstream in;
+               m->openInputFile(inputFile, in);
+               alignLength = 0;
+        
+               while (!in.eof()) {
+            Sequence temp(in); m->gobble(in);
+                       
+                       int count = ct->getNumSeqs(temp.getName());
+                       if (m->control_pressed) { break; }
+                       else {
+                               sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
+                       }
+               }
+               in.close();
+               
+               //sort by frequency
+               sort(sequences.rbegin(), sequences.rend());
+               
+               return sequences;
+       }
        catch(exception& e) {
                m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
                exit(1);
@@ -625,7 +830,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                }
                
                int numSeqs = sequences.size();
-               int alignLength = sequences[0].sequence.size();
+               //int alignLength = sequences[0].sequence.size();
                
                ofstream chimeraFile;
                ofstream accnosFile;
@@ -640,9 +845,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                vector<bool> chimeras(numSeqs, 0);
                
                for(int i=0;i<numSeqs;i++){     
-                       cout << sequences[i].seqName << endl << sequences[i].sequence << endl << sequences[i].frequency << endl;
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
-                       
+    
                        vector<bool> restricted = chimeras;
                        
                        vector<vector<int> > leftDiffs(numSeqs);
@@ -657,16 +861,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                        vector<pwAlign> alignments(numSeqs);
                        
                        int comparisons = myPerseus.getAlignments(i, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
-                       cout << "comparisons = " << comparisons << endl;
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                        int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
                        
                        string dummyA, dummyB;
                        
-                       if(comparisons >= 2){   
+            if (sequences[i].sequence.size() < 3) { 
+                chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+            }else if(comparisons >= 2){        
                                minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                               cout << "minMismatchToChimera = " << minMismatchToChimera << endl;
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                int minMismatchToTrimera = numeric_limits<int>::max();
@@ -674,12 +878,11 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                
                                if(minMismatchToChimera >= 3 && comparisons >= 3){
                                        minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                                       cout << "minMismatchToTrimera = " << minMismatchToTrimera << endl;
                                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                }
                                
                                double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
-                               cout << "singleDist = " << singleDist << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                string type;
@@ -693,16 +896,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                        type = "chimera";
                                        chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
                                }
-                               cout << "chimeraRefSeq = " << chimeraRefSeq << endl;
+                               ;
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                
                                double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
-                               cout << "chimeraDist = " << chimeraDist << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq);
                                double loonIndex = myPerseus.calcLoonIndex(sequences[i].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
-                               cout << "loonIndex = " << loonIndex << endl;
+                               
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
                                chimeraFile << i << '\t' << sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
@@ -746,7 +949,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
        }
 }
 /**************************************************************************************************/
-int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
+int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
        try {
                
                vector<int> processIDS;
@@ -766,7 +969,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -776,7 +979,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
                                
                                //pass numSeqs to parent
                                ofstream out;
@@ -794,7 +997,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                }
                
                //do my part
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -825,7 +1028,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
+                       perseusData* tempPerseus = new perseusData(hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
                        
                        pDataArray.push_back(tempPerseus);
                        processIDS.push_back(i);
@@ -837,11 +1040,11 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                
                
                //using the main process as a worker saves time and memory
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-               cout << "here" << endl; 
+                       
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                        num += pDataArray[i]->count;
@@ -869,9 +1072,8 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
        }
 }
 //**********************************************************************************************************************
-int ChimeraPerseusCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName){
+int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
        try {
-               map<string, string> uniqueNames = parser.getAllSeqsMap();
                map<string, string>::iterator itUnique;
                int total = 0;