]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraperseuscommand.cpp
working on pam
[mothur.git] / chimeraperseuscommand.cpp
index 138a6b969b48b392e10ea1329a15444cc66deb97..0547802198960198d9d2a063bc498e79f04bfe33 100644 (file)
 #include "chimeraperseuscommand.h"
 #include "deconvolutecommand.h"
 #include "sequence.hpp"
+#include "counttable.h"
+#include "sequencecountparser.h"
 //**********************************************************************************************************************
 vector<string> ChimeraPerseusCommand::setParameters(){ 
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "",false,false); parameters.push_back(palpha);
-               CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "",false,false); parameters.push_back(pbeta);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
+
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha);
+               CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta);
                        
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -36,13 +41,15 @@ vector<string> ChimeraPerseusCommand::setParameters(){
 string ChimeraPerseusCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The chimera.perseus command reads a fastafile and namefile and outputs potentially chimeric sequences.\n";
-               helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
+               helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
+               helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
+               helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed.\n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+        helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
                helpString += "The alpha parameter ....  The default is -5.54. \n";
                helpString += "The beta parameter ....  The default is 0.33. \n";
                helpString += "The cutoff parameter ....  The default is 0.50. \n";
@@ -58,6 +65,23 @@ string ChimeraPerseusCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ChimeraPerseusCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "chimera") {  pattern = "[filename],perseus.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],perseus.accnos"; }
+        else if (type == "count") {  pattern = "[filename],perseus.pick.count_table"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 ChimeraPerseusCommand::ChimeraPerseusCommand(){        
        try {
                abort = true; calledHelp = true;
@@ -65,6 +89,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){
                vector<string> tempOutNames;
                outputTypes["chimera"] = tempOutNames;
                outputTypes["accnos"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand");
@@ -75,6 +100,8 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(){
 ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
        try {
                abort = false; calledHelp = false; 
+        hasCount = false;
+        hasName = false;
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -86,7 +113,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
                        
-                       ValidParameters validParameter("chimera.uchime");
+                       ValidParameters validParameter("chimera.perseus");
                        map<string,string>::iterator it;
                        
                        //check to make sure all parameters are valid for command
@@ -97,6 +124,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        vector<string> tempOutNames;
                        outputTypes["chimera"] = tempOutNames;
                        outputTypes["accnos"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -182,15 +210,9 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        
                        //check for required parameters
-                       bool hasName = true;
                        namefile = validParameter.validFile(parameters, "name", false);
-                       if (namefile == "not found") { 
-                               //if there is a current fasta file, use it
-                               string filename = m->getNameFile(); 
-                               if (filename != "") { nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }                                
-                               hasName = false;
-                       }else { 
+                       if (namefile == "not found") { namefile = "";   }
+                       else { 
                                m->splitAtDash(namefile, nameFileNames);
                                
                                //go through files and make sure they are good, if not, then disregard them
@@ -256,12 +278,101 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                                                }
                                        }
                                }
+                       }
+            
+            if (nameFileNames.size() != 0) { hasName = true; }
+            
+            //check for required parameters
+            vector<string> countfileNames;
+                       countfile = validParameter.validFile(parameters, "count", false);
+                       if (countfile == "not found") { 
+                countfile = "";  
+                       }else { 
+                               m->splitAtDash(countfile, countfileNames);
                                
-                               //make sure there is at least one valid file left
-                               if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < countfileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (countfileNames[i] == "current") { 
+                                               countfileNames[i] = m->getCountTableFile(); 
+                                               if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(countfileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setCountTableFile(countfileNames[i]);
+                                               }
+                                       }
+                               }
                        }
-                       
-                       if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+            
+            if (countfileNames.size() != 0) { hasCount = true; }
+            
+                       //make sure there is at least one valid file left
+            if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
+            if (!hasName && !hasCount) { 
+                //if there is a current name file, use it, else look for current count file
+                               string filename = m->getNameFile(); 
+                               if (filename != "") { hasName = true; nameFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the name parameter."); m->mothurOutEndLine(); }
+                               else { 
+                    filename = m->getCountTableFile();
+                    if (filename != "") { hasCount = true; countfileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                    else { m->mothurOut("[ERROR]: You must provide a count or name file."); m->mothurOutEndLine(); abort = true;  }
+                }
+            }
+            if (!hasName && hasCount) { nameFileNames = countfileNames; }
+            
+                       if (nameFileNames.size() != fastaFileNames.size()) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
                        bool hasGroup = true;
                        groupfile = validParameter.validFile(parameters, "group", false);
@@ -339,6 +450,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
                        
+            if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -355,6 +467,10 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.33";  }
                        m->mothurConvert(temp, beta);
+            
+                       temp = validParameter.validFile(parameters, "dereplicate", false);      
+                       if (temp == "not found") { temp = "false";                      }
+                       dups = m->isTrue(temp);
                }
        }
        catch(exception& e) {
@@ -375,9 +491,13 @@ int ChimeraPerseusCommand::execute(){
                        m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                        
                        int start = time(NULL); 
-                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera";
-                       string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "perseus.accnos";
+                       if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
+                       map<string, string> variables;
+                       variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+                       string outputFileName = getOutputFileName("chimera", variables);
+                       string accnosFileName = getOutputFileName("accnos", variables);
+            string newCountFile = "";
+
                        //string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
                        
                        //you provided a groupfile
@@ -393,41 +513,126 @@ int ChimeraPerseusCommand::execute(){
                        
                        int numSeqs = 0;
                        int numChimeras = 0;
-                       
-                       if (groupFile != "") {
-                               //Parse sequences by group
-                               SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
-                               vector<string> groups = parser.getNamesOfGroups();
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
-                               
-                               //clears files
-                               ofstream out, out1, out2;
-                               m->openOutputFile(outputFileName, out); out.close(); 
-                               m->openOutputFile(accnosFileName, out1); out1.close();
-                               
-                               if(processors == 1)     {       numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups);       }
-                               else                            {       numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile);                        }
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                               numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName);
-                               
-                               m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
-                               
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
-                               
-                       }else{
-                               if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
-                               
-                               //read sequences and store sorted by frequency
-                               vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
-                               
-                               if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
-                               
-                               numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+            
+            if (hasCount) {
+                CountTable* ct = new CountTable();
+                ct->readTable(nameFile, true, false);
+                
+                if (ct->hasGroupInfo()) {
+                    cparser = new SequenceCountParser(fastaFileNames[s], *ct);
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                    newCountFile = getOutputFileName("count", variables);
+                    
+                    vector<string> groups = cparser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, newCountFile, 0, groups.size(), groups);
+                        if (dups) {
+                            CountTable c; c.readTable(nameFile, true, false);
+                            if (!m->isBlank(newCountFile)) {
+                                ifstream in2;
+                                m->openInputFile(newCountFile, in2);
+                                
+                                string name, group;
+                                while (!in2.eof()) {
+                                    in2 >> name >> group; m->gobble(in2);
+                                    c.setAbund(name, group, 0);
+                                }
+                                in2.close();
+                            }
+                            m->mothurRemove(newCountFile);
+                            c.printTable(newCountFile);
+                        }
+
+                    }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, newCountFile, groups, groupFile, fastaFileNames[s], nameFile);                  }
+                    
+                    if (m->control_pressed) {  delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = cparser->getAllSeqsMap();
+                    if (!dups) { 
+                        numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    }else {
+                        set<string> doNotRemove;
+                        CountTable c; c.readTable(newCountFile, true, true);
+                        vector<string> namesInTable = c.getNamesOfSeqs();
+                        for (int i = 0; i < namesInTable.size(); i++) {
+                            int temp = c.getNumSeqs(namesInTable[i]);
+                            if (temp == 0) {  c.remove(namesInTable[i]);  }
+                            else { doNotRemove.insert((namesInTable[i])); }
+                        }
+                        //remove names we want to keep from accnos file.
+                        set<string> accnosNames = m->readAccnos(accnosFileName);
+                        ofstream out2;
+                        m->openOutputFile(accnosFileName, out2);
+                        for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+                            if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
+                        }
+                        out2.close();
+                        c.printTable(newCountFile);
+                        outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
+
+                    }
+                    delete cparser;
+
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  delete ct; for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;  } 
+                    
+                }else {
+                    if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], ct);
+                    
+                    if (m->control_pressed) { delete ct; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras);   
+                }
+                delete ct;
+            }else {
+                if (groupFile != "") {
+                    //Parse sequences by group
+                    parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+                    vector<string> groups = parser->getNamesOfGroups();
+                    
+                    if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }  return 0; }
+                    
+                    //clears files
+                    ofstream out, out1, out2;
+                    m->openOutputFile(outputFileName, out); out.close(); 
+                    m->openOutputFile(accnosFileName, out1); out1.close();
+                    
+                    if(processors == 1)        {       numSeqs = driverGroups(outputFileName, accnosFileName, "", 0, groups.size(), groups);   }
+                    else                               {       numSeqs = createProcessesGroups(outputFileName, accnosFileName, "", groups, groupFile, fastaFileNames[s], nameFile);                    }
+                    
+                    if (m->control_pressed) {  delete parser; for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0;    }                               
+                    map<string, string> uniqueNames = parser->getAllSeqsMap();
+                    if (!dups) { 
+                        numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+                    }
+                    delete parser;
+                    
+                    m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
+                    
+                    if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0;  }         
+                }else{
+                    if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                    
+                    //read sequences and store sorted by frequency
+                    vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile);
+                    
+                    if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        } return 0; }
+                    
+                    numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); 
+                }
                        }
-                       
+            
                        if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                        
                        m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
@@ -441,6 +646,11 @@ int ChimeraPerseusCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -466,14 +676,15 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -487,11 +698,14 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
        }
 }
 //**********************************************************************************************************************
-int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFName, string accnos, int start, int end, vector<string> groups){
+int ChimeraPerseusCommand::driverGroups(string outputFName, string accnos, string countlist, int start, int end, vector<string> groups){
        try {
                
                int totalSeqs = 0;
                int numChimeras = 0;
+        
+        ofstream outCountList;
+        if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
                
                for (int i = start; i < end; i++) {
                        
@@ -499,7 +713,7 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                        
                        int start = time(NULL);  if (m->control_pressed) {  return 0; }
                        
-                       vector<seqData> sequences = loadSequences(parser, groups[i]);
+                       vector<seqData> sequences = loadSequences(groups[i]);
                        
                        if (m->control_pressed) { return 0; }
                        
@@ -507,6 +721,39 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                        totalSeqs += numSeqs;
                        
                        if (m->control_pressed) { return 0; }
+            
+            if (dups) {
+                if (!m->isBlank(accnos+groups[i])) {
+                    ifstream in;
+                    m->openInputFile(accnos+groups[i], in);
+                    string name;
+                    if (hasCount) {
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            outCountList << name << '\t' << groups[i] << endl;
+                        }
+                        in.close();
+                    }else {
+                        map<string, string> thisnamemap = parser->getNameMap(groups[i]);
+                        map<string, string>::iterator itN;
+                        ofstream out;
+                        m->openOutputFile(accnos+groups[i]+".temp", out);
+                        while (!in.eof()) {
+                            in >> name; m->gobble(in);
+                            itN = thisnamemap.find(name);
+                            if (itN != thisnamemap.end()) {
+                                vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
+                                for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+                                
+                            }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+                        }
+                        out.close();
+                        in.close();
+                        m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+                    }
+                    
+                }
+            }
                        
                        //append files
                        m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
@@ -515,6 +762,8 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
                        m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
                }       
                
+        if (hasCount && dups) { outCountList.close(); }
+        
                return totalSeqs;
                
        }
@@ -524,32 +773,50 @@ int ChimeraPerseusCommand::driverGroups(SequenceParser& parser, string outputFNa
        }
 }      
 //**********************************************************************************************************************
-vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, string group){
+vector<seqData> ChimeraPerseusCommand::loadSequences(string group){
        try {
-               
-               vector<Sequence> thisGroupsSeqs = parser.getSeqs(group);
-               map<string, string> nameMap = parser.getNameMap(group);
-               map<string, string>::iterator it;
-               
-               vector<seqData> sequences;
-               bool error = false;
-        alignLength = 0;
-               
-               for (int i = 0; i < thisGroupsSeqs.size(); i++) {
-               
-                       if (m->control_pressed) {  return sequences; }
-                       
-                       it = nameMap.find(thisGroupsSeqs[i].getName());
-                       if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
-                       else {
-                               int num = m->getNumNames(it->second);
-                               sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
-                if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
-                       }
+        bool error = false;
+               alignLength = 0;
+        vector<seqData> sequences;
+        if (hasCount) {
+            vector<Sequence> thisGroupsSeqs = cparser->getSeqs(group);
+            map<string, int> counts = cparser->getCountTable(group);
+            map<string, int>::iterator it;
+            
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = counts.find(thisGroupsSeqs[i].getName());
+                if (it == counts.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); m->mothurOutEndLine(); }
+                else {
+                    thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+        }else{
+            vector<Sequence> thisGroupsSeqs = parser->getSeqs(group);
+            map<string, string> nameMap = parser->getNameMap(group);
+            map<string, string>::iterator it;
+           
+            for (int i = 0; i < thisGroupsSeqs.size(); i++) {
+                
+                if (m->control_pressed) {  return sequences; }
+                
+                it = nameMap.find(thisGroupsSeqs[i].getName());
+                if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
+                else {
+                    int num = m->getNumNames(it->second);
+                    thisGroupsSeqs[i].setAligned(removeNs(thisGroupsSeqs[i].getUnaligned()));
+                    sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                    if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
+                }
+            }
+            
                }
                
-               if (error) { m->control_pressed = true; }
-               
+        if (error) { m->control_pressed = true; }
                //sort by frequency
                sort(sequences.rbegin(), sequences.rend());
                
@@ -583,6 +850,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                        it = nameMap.find(temp.getName());
                        if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                        else {
+                temp.setAligned(removeNs(temp.getUnaligned()));
                                sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
                 if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                        }
@@ -596,6 +864,52 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                
                return sequences;
        }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "readFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ChimeraPerseusCommand::removeNs(string seq){
+       try {
+        string newSeq = "";
+        for (int i = 0; i < seq.length(); i++) {
+            if (seq[i] != 'N') {  newSeq += seq[i]; }
+        }
+        return newSeq;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPerseusCommand", "removeNs");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, CountTable* ct){
+       try {           
+               //read fasta file and create sequenceData structure - checking for file mismatches
+               vector<seqData> sequences;
+               ifstream in;
+               m->openInputFile(inputFile, in);
+               alignLength = 0;
+        
+               while (!in.eof()) {
+            Sequence temp(in); m->gobble(in);
+                       
+                       int count = ct->getNumSeqs(temp.getName());
+                       if (m->control_pressed) { break; }
+                       else {
+                temp.setAligned(removeNs(temp.getUnaligned()));
+                               sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), count));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
+                       }
+               }
+               in.close();
+               
+               //sort by frequency
+               sort(sequences.rbegin(), sequences.rend());
+               
+               return sequences;
+       }
        catch(exception& e) {
                m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile");
                exit(1);
@@ -695,8 +1009,8 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                                        type = "chimera";
                                        chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
                                }
-                               ;
-                               if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
+
+                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
                                
                                double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
                                
@@ -732,10 +1046,10 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                        }
        
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1) + "\n");           }
+                       if((i+1) % 100 == 0){   m->mothurOutJustToScreen("Processing sequence: " + toString(i+1) + "\n");               }
                }
                
-               if((numSeqs) % 100 != 0){       m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");               }
+               if((numSeqs) % 100 != 0){       m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs) + "\n");           }
                
                chimeraFile.close();
                accnosFile.close();
@@ -748,13 +1062,16 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
        }
 }
 /**************************************************************************************************/
-int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string accnos, vector<string> groups, string group, string fasta, string name) {
+int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accnos, string newCountFile, vector<string> groups, string group, string fasta, string name) {
        try {
                
                vector<int> processIDS;
                int process = 1;
                int num = 0;
                
+        CountTable newCount;
+        if (hasCount && dups) { newCount.readTable(name, true, false); }
+        
                //sanity check
                if (groups.size() < processors) { processors = groups.size(); }
                
@@ -778,7 +1095,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+                               num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
                                
                                //pass numSeqs to parent
                                ofstream out;
@@ -796,7 +1113,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                }
                
                //do my part
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -827,7 +1144,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       perseusData* tempPerseus = new perseusData(alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension, groups, m, lines[i].start, lines[i].end, i);
+                       perseusData* tempPerseus = new perseusData(dups, hasName, hasCount, alpha, beta, cutoff, outputFName+extension, fasta, name, group, accnos+extension,  accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
                        
                        pDataArray.push_back(tempPerseus);
                        processIDS.push_back(i);
@@ -839,7 +1156,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                
                
                //using the main process as a worker saves time and memory
-               num = driverGroups(parser, outputFName, accnos, lines[0].start, lines[0].end, groups);
+               num = driverGroups(outputFName, accnos, accnos + ".byCount", lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -851,7 +1168,22 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        delete pDataArray[i];
                }
 #endif         
-               
+               //read my own
+        if (hasCount && dups) {
+            if (!m->isBlank(accnos + ".byCount")) {
+                ifstream in2;
+                m->openInputFile(accnos + ".byCount", in2);
+                
+                string name, group;
+                while (!in2.eof()) {
+                    in2 >> name >> group; m->gobble(in2);
+                    newCount.setAbund(name, group, 0);
+                }
+                in2.close();
+            }
+            m->mothurRemove(accnos + ".byCount");
+        }
+
                
                //append output files
                for(int i=0;i<processIDS.size();i++){
@@ -860,8 +1192,27 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        
                        m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
                        m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+            
+            if (hasCount && dups) {
+                if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+                    ifstream in2;
+                    m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+                    
+                    string name, group;
+                    while (!in2.eof()) {
+                        in2 >> name >> group; m->gobble(in2);
+                        newCount.setAbund(name, group, 0);
+                    }
+                    in2.close();
+                }
+                m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+            }
+
                }
                
+        //print new *.pick.count_table
+        if (hasCount && dups) {  newCount.printTable(newCountFile);   }
+
                return num;     
                
        }
@@ -871,9 +1222,8 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
        }
 }
 //**********************************************************************************************************************
-int ChimeraPerseusCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName){
+int ChimeraPerseusCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName){
        try {
-               map<string, string> uniqueNames = parser.getAllSeqsMap();
                map<string, string>::iterator itUnique;
                int total = 0;