get.seqs not renaming sequences correctly for dups=f. unique.seqs crash when count...

[mothur.git] / chimeraperseuscommand.cpp
diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp

index e3691e8b942c2f34da91cec14691e717d5c16c1a..cea481bd22062b7461a06705d08ba4d1c1930451 100644 (file)
--- a/chimeraperseuscommand.cpp
+++ b/chimeraperseuscommand.cpp
@@ -10,18 +10,23 @@
  #include "chimeraperseuscommand.h"
  #include "deconvolutecommand.h"
  #include "sequence.hpp"
+#include "counttable.h"
+#include "sequencecountparser.h"
  //**********************************************************************************************************************
  vector<string> ChimeraPerseusCommand::setParameters(){ 
         try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
-               CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
+
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
+               CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
                         
                 vector<string> myArray;
                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -36,13 +41,15 @@ vector<string> ChimeraPerseusCommand::setParameters(){
  string ChimeraPerseusCommand::getHelpString(){ 
         try {
                 string helpString = "";
-               helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
-               helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
+               helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
+               helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
+               helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed.\n";
                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+        helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
                 helpString += "The alpha parameter ....  The default is -5.54. \n";
                 helpString += "The beta parameter ....  The default is 0.33. \n";
                 helpString += "The cutoff parameter ....  The default is 0.50. \n";
@@ -58,25 +65,21 @@ string ChimeraPerseusCommand::getHelpString(){
         }
  }
  //**********************************************************************************************************************
-string ChimeraPerseusCommand::getOutputFileNameTag(string type, string inputName=""){  
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ChimeraPerseusCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
          
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "chimera") {  outputFileName =  "perseus.chimeras"; }
-            else if (type == "accnos") {  outputFileName =  "perseus.accnos"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraPerseusCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "chimera") {  pattern = "[filename],perseus.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],perseus.accnos"; }
+        else if (type == "count") {  pattern = "[filename],perseus.pick.count_table"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
+        exit(1);
+    }
  }
  //**********************************************************************************************************************
  ChimeraPerseusCommand::ChimeraPerseusCommand(){        
@@ -86,6 +89,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){
                 vector<string> tempOutNames;
                 outputTypes["chimera"] = tempOutNames;
                 outputTypes["accnos"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
@@ -96,6 +100,8 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){
  ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
         try {
                 abort = false; calledHelp = false; 
+        hasCount = false;
+        hasName = false;
                 
                 //allow user to run help
                 if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -107,7 +113,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                         OptionParser parser(option);
                         map<string,string> parameters = parser.getParameters();
                         
-                       ValidParameters validParameter("chimera.uchime");
+                       ValidParameters validParameter("chimera.perseus");
                         map<string,string>::iterator it;
                         
                         //check to make sure all parameters are valid for command
@@ -118,6 +124,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                         vector<string> tempOutNames;
                         outputTypes["chimera"] = tempOutNames;
                         outputTypes["accnos"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -203,15 +210,9 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                         
                         
                         //check for required parameters
-                       bool hasName = true;
                         namefile = validParameter.validFile(parameters, "name", false);
-                       if (namefile == "not found") { 
-                               //if there is a current fasta file, use it
-                               string filename = m->getNameFile(); 
-                               if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }                                
-                               hasName = false;
-                       }else { 
+                       if (namefile == "not found") { namefile = "";   }
+                       else { 
                                 m->splitAtDash(namefile, nameFileNames);
                                 
                                 //go through files and make sure they are good, if not, then disregard them
@@ -277,12 +278,101 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                                                 }
                                         }
                                 }
+                       }
+            
+            if (nameFileNames.size() != 0) { hasName = true; }
+            
+            //check for required parameters
+            vector<string> countfileNames;
+                       countfile = validParameter.validFile(parameters, "count", false);
+                       if (countfile == "not found") { 
+                countfile = "";  
+                       }else { 
+                               m->splitAtDash(countfile, countfileNames);
                                 
-                               //make sure there is at least one valid file left
-                               if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < countfileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (countfileNames[i] == "current") { 
+                                               countfileNames[i] = m->getCountTableFile(); 
+                                               if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(countfileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setCountTableFile(countfileNames[i]);
+                                               }
+                                       }
+                               }
                         }
-                       
-                       if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+            
+            if (countfileNames.size() != 0) { hasCount = true; }
+            
+                       //make sure there is at least one valid file left
+            if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
+            if (!hasName && !hasCount) { 
+                //if there is a current name file, use it, else look for current count file
+                               string filename = m->getNameFile(); 
+                               if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
+                               else { 
+                    filename = m->getCountTableFile();
+                    if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                    else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
+                }
+            }
+            if (!hasName && hasCount) { nameFileNames = countfileNames; }
+            
+                       if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                         
                         bool hasGroup = true;
                         groupfile = validParameter.validFile(parameters, "group", false);
@@ -360,6 +450,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                         
                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                         
+            if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -376,6 +467,10 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                         
                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
                         m->mothurConvert(temp, beta);
+            
+                       temp = validParameter.validFile(parameters, "dereplicate", false);      
+                       if (temp == "not found") { temp = "false";                      }
+                       dups = m->isTrue(temp);
                 }
         }
         catch(exception& e) {
@@ -396,9 +491,12 @@ int ChimeraPerseusCommand::execute(){
                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                         
                         int start = time(NULL); 
-                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
+                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
+                       map<string, string> variables;
+                       variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+                       string outputFileName = getOutputFileName("chimera", variables);
+                       string accnosFileName = getOutputFileName("accnos", variables);
+            string newCountFile = "";
  
                         //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                         
@@ -415,41 +513,126 @@ int ChimeraPerseusCommand::execute(){
                         
                         int numSeqs = 0;
                         int numChimeras = 0;
-                       
-                       if (groupFile != "") {
-                               //Parse sequences by group
-                               SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
-                               vector<string> groups = parser.getNamesOfGroups();
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
-                               
-                               //clears files
-                               ofstream out, out1, out2;
-                               m->openOutputFile(outputFileName, out); out.close(); 
-                               m->openOutputFile(accnosFileName, out1); out1.close();
-                               
-                               if(processors == 1)     {       numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);       }
-                               else                            {       numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                               numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
-                               
-                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                       }else{
-                               if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
-                               
-                               //read sequences and store sorted by frequency
-                               vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
-                               
-                               numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+            
+            if (hasCount) {
+                CountTable* ct = new CountTable();
+                ct->readTable(nameFile, true);
+                
+                if (ct->hasGroupInfo()) {
+                    cparser = new SequenceCountParser(fastaFileNames[s], *ct);
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                    newCountFile = getOutputFileName("count", variables);
+                    
+                    vector<string> groups = cparser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, newCountFile, 0, groups.size(), groups);
+                        if (dups) {
+                            CountTable c; c.readTable(nameFile, true);
+                            if (!m->isBlank(newCountFile)) {
+                                ifstream in2;
+                                m->openInputFile(newCountFile, in2);
+                                
+                                string name, group;
+                                while (!in2.eof()) {
+                                    in2 >> name >> group; m->gobble(in2);
+                                    c.setAbund(name, group, 0);
+                                }
+                                in2.close();
+                            }
+                            m->mothurRemove(newCountFile);
+                            c.printTable(newCountFile);
+                        }
+
+                    }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, newCountFile, groups, groupFile, fastaFileNames[s], nameFile);                  }
+                    
+                    if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = cparser->getAllSeqsMap();
+                    if (!dups) { 
+                        numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    }else {
+                        set<string> doNotRemove;
+                        CountTable c; c.readTable(newCountFile, true);
+                        vector<string> namesInTable = c.getNamesOfSeqs();
+                        for (int i = 0; i < namesInTable.size(); i++) {
+                            int temp = c.getNumSeqs(namesInTable[i]);
+                            if (temp == 0) {  c.remove(namesInTable[i]);  }
+                            else { doNotRemove.insert((namesInTable[i])); }
+                        }
+                        //remove names we want to keep from accnos file.
+                        set<string> accnosNames = m->readAccnos(accnosFileName);
+                        ofstream out2;
+                        m->openOutputFile(accnosFileName, out2);
+                        for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+                            if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
+                        }
+                        out2.close();
+                        c.printTable(newCountFile);
+                        outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
+
+                    }
+                    delete cparser;
+
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;  } 
+                    
+                }else {
+                    if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
+                    
+                    if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
+                }
+                delete ct;
+            }else {
+                if (groupFile != "") {
+                    //Parse sequences by group
+                    parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+                    vector<string> groups = parser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, "", 0, groups.size(), groups);   }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, "", groups, groupFile, fastaFileNames[s], nameFile);                    }
+                    
+                    if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = parser->getAllSeqsMap();
+                    if (!dups) { 
+                        numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    }
+                    delete parser;
+                    
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0;  }         
+                }else{
+                    if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
+                    
+                    if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+                }
                         }
-                       
+            
                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                         
                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
@@ -463,6 +646,11 @@ int ChimeraPerseusCommand::execute(){
                 if (itTypes != outputTypes.end()) {
                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
                 }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                 
                 m->mothurOutEndLine();
                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -510,11 +698,14 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
         }
  }
  //**********************************************************************************************************************
-int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector<string> groups){
+int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, string countlist, int start, int end, vector<string> groups){
         try {
                 
                 int totalSeqs = 0;
                 int numChimeras = 0;
+        
+        ofstream outCountList;
+        if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
                 
                 for (int i = start; i < end; i++) {
                         
@@ -522,7 +713,7 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                         
                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
                         
-                       vector<seqData> sequences = loadSequences(parser, groups[i]);
+                       vector<seqData> sequences = loadSequences(groups[i]);
                         
                         if (m->control_pressed) { return 0; }
                         
@@ -530,6 +721,39 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                         totalSeqs += numSeqs;
                         
                         if (m->control_pressed) { return 0; }
+            
+            if (dups) {
+                if (!m->isBlank(accnos+groups[i])) {
+                    ifstream in;
+                    m->openInputFile(accnos+groups[i], in);
+                    string name;
+                    if (hasCount) {
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            outCountList << name << '\t' << groups[i] << endl;
+                        }
+                        in.close();
+                    }else {
+                        map<string, string> thisnamemap = parser->getNameMap(groups[i]);
+                        map<string, string>::iterator itN;
+                        ofstream out;
+                        m->openOutputFile(accnos+groups[i]+".temp", out);
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                                
+                            }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+                        }
+                        out.close();
+                        in.close();
+                        m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+                    }
+                    
+                }
+            }
                         
                         //append files
                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
@@ -538,6 +762,8 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
                 }       
                 
+        if (hasCount && dups) { outCountList.close(); }
+        
                 return totalSeqs;
                 
         }
@@ -547,32 +773,50 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
         }
  }      
  //**********************************************************************************************************************
-vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){
+vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
         try {
-               
-               vector<Sequence> thisGroupsSeqs = parser.getSeqs(group);
-               map<string, string> nameMap = parser.getNameMap(group);
-               map<string, string>::iterator it;
-               
-               vector<seqData> sequences;
-               bool error = false;
-        alignLength = 0;
-               
-               for (int i = 0; i < thisGroupsSeqs.size(); i++) {
-               
-                       if (m->control_pressed) {  return sequences; }
-                       
-                       it = nameMap.find(thisGroupsSeqs[i].getName());
-                       if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
-                       else {
-                               int num = m->getNumNames(it->second);
-                               sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
-                if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
-                       }
+        bool error = false;
+               alignLength = 0;
+        vector<seqData> sequences;
+        if (hasCount) {
+            vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
+            map<string, int> counts = cparser->getCountTable(group);
+            map<string, int>::iterator it;
+            
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = counts.find(thisGroupsSeqs[i].getName());
+                if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
+                else {
+                    thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+        }else{
+            vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
+            map<string, string> nameMap = parser->getNameMap(group);
+            map<string, string>::iterator it;
+           
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = nameMap.find(thisGroupsSeqs[i].getName());
+                if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
+                else {
+                    int num = m->getNumNames(it->second);
+                    thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+            
                 }
                 
-               if (error) { m->control_pressed = true; }
-               
+        if (error) { m->control_pressed = true; }
                 //sort by frequency
                 sort(sequences.rbegin(), sequences.rend());
                 
@@ -606,6 +850,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                         it = nameMap.find(temp.getName());
                         if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                         else {
+                temp.setAligned(removeNs(temp.getUnaligned()));
                                 sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
                  if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                         }
@@ -619,6 +864,52 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                 
                 return sequences;
         }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ChimeraPerseusCommand::removeNs(string seq){
+       try {
+        string newSeq = "";
+        for (int i = 0; i < seq.length(); i++) {
+            if (seq[i] != 'N') {  newSeq += seq[i]; }
+        }
+        return newSeq;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
+       try {           
+               //read fasta file and create sequenceData structure - checking for file mismatches
+               vector<seqData> sequences;
+               ifstream in;
+               m->openInputFile(inputFile, in);
+               alignLength = 0;
+        
+               while (!in.eof()) {
+            Sequence temp(in); m->gobble(in);
+                       
+                       int count = ct->getNumSeqs(temp.getName());
+                       if (m->control_pressed) { break; }
+                       else {
+                temp.setAligned(removeNs(temp.getUnaligned()));
+                               sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
+                       }
+               }
+               in.close();
+               
+               //sort by frequency
+               sort(sequences.rbegin(), sequences.rend());
+               
+               return sequences;
+       }
         catch(exception& e) {
                 m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
                 exit(1);
@@ -718,8 +1009,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                         type = "chimera";
                                         chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
                                 }
-                               ;
-                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                 
                                 double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
                                 
@@ -755,10 +1046,10 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                         }
         
                         //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
+                       if((i+1) % 100 == 0){   m->mothurOutJustToScreen("Processing sequence: " + toString(i+1) + "\n");               }
                 }
                 
-               if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
+               if((numSeqs) % 100 != 0){       m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs) + "\n");           }
                 
                 chimeraFile.close();
                 accnosFile.close();
@@ -771,13 +1062,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
         }
  }
  /**************************************************************************************************/
-int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
+int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, string newCountFile, vector<string> groups, string group, string fasta, string name) {
         try {
                 
                 vector<int> processIDS;
                 int process = 1;
                 int num = 0;
                 
+        CountTable newCount;
+        if (hasCount && dups) { newCount.readTable(name, true); }
+        
                 //sanity check
                 if (groups.size() < processors) { processors = groups.size(); }
                 
@@ -801,7 +1095,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
                                 
                                 //pass numSeqs to parent
                                 ofstream out;
@@ -819,7 +1113,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                 }
                 
                 //do my part
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
@@ -850,7 +1144,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                         // Allocate memory for thread data.
                         string extension = toString(i) + ".temp";
                         
-                       perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
+                       perseusData* tempPerseus = new perseusData(dups, hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension,  accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
                         
                         pDataArray.push_back(tempPerseus);
                         processIDS.push_back(i);
@@ -862,7 +1156,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                 
                 
                 //using the main process as a worker saves time and memory
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                 
                 //Wait until all threads have terminated.
                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -874,7 +1168,22 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                         delete pDataArray[i];
                 }
  #endif         
-               
+               //read my own
+        if (hasCount && dups) {
+            if (!m->isBlank(accnos + ".byCount")) {
+                ifstream in2;
+                m->openInputFile(accnos + ".byCount", in2);
+                
+                string name, group;
+                while (!in2.eof()) {
+                    in2 >> name >> group; m->gobble(in2);
+                    newCount.setAbund(name, group, 0);
+                }
+                in2.close();
+            }
+            m->mothurRemove(accnos + ".byCount");
+        }
+
                 
                 //append output files
                 for(int i=0;i<processIDS.size();i++){
@@ -883,8 +1192,27 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                         
                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+            
+            if (hasCount && dups) {
+                if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+                    ifstream in2;
+                    m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+                    
+                    string name, group;
+                    while (!in2.eof()) {
+                        in2 >> name >> group; m->gobble(in2);
+                        newCount.setAbund(name, group, 0);
+                    }
+                    in2.close();
+                }
+                m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+            }
+
                 }
                 
+        //print new *.pick.count_table
+        if (hasCount && dups) {  newCount.printTable(newCountFile);   }
+
                 return num;     
                 
         }
@@ -894,9 +1222,8 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
         }
  }
  //**********************************************************************************************************************
-int ChimeraPerseusCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName){
+int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
         try {
-               map<string, string> uniqueNames = parser.getAllSeqsMap();
                 map<string, string>::iterator itUnique;
                 int total = 0;