m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
- string baseTName = taxonomyFileName;
+ string baseTName = m->getSimpleName(taxonomyFileName);
if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy(); }
//set rippedTaxName to
else {
if (type == "fasta") { outputFileName = "fragclust.fasta"; }
else if (type == "name") { outputFileName = "fragclust.names"; }
- else if (type == "count") { outputFileName = "fragclust.count.table"; }
+ else if (type == "count") { outputFileName = "fragclust.count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
else {
if (type == "fasta") { outputFileName = "cons.fasta"; }
else if (type == "name") { outputFileName = "cons.names"; }
- else if (type == "count") { outputFileName = "cons.count.table"; }
+ else if (type == "count") { outputFileName = "cons.count_table"; }
else if (type == "summary") { outputFileName = "cons.summary"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
string CountSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count.table file. You may also provide a group file to get the counts broken down by group.\n";
+ helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count_table file. You may also provide a group file to get the counts broken down by group.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
it = outputTypes.find(type);
if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
else {
- if (type == "counttable") { outputFileName = "count.table"; }
+ if (type == "counttable") { outputFileName = "count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
}
}
+ if (hasGroups) {
+ for (int i = 0; i < totalGroups.size(); i++) {
+ if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+ }
+ }
+
return 0;
}
catch(exception& e) {
in.close();
if (error) { m->control_pressed = true; }
- if (groupfile != "") { delete groupMap; }
+ else { //check for zero groups
+ if (hasGroups) {
+ for (int i = 0; i < totalGroups.size(); i++) {
+ if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+ }
+ }
+ }
+ if (groupfile != "") { delete groupMap; }
return 0;
}
in.close();
if (error) { m->control_pressed = true; }
+ else { //check for zero groups
+ if (hasGroups) {
+ for (int i = 0; i < totalGroups.size(); i++) {
+ if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+ }
+ }
+ }
return 0;
}
}
}
/************************************************************/
+//remove group
+int CountTable::removeGroup(string groupName) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ int indexOfGroupToRemove = it->second;
+ map<string, int> currentGroupIndex = indexGroupMap;
+ vector<string> newGroups;
+ for (int i = 0; i < groups.size(); i++) {
+ if (groups[i] != groupName) {
+ newGroups.push_back(groups[i]);
+ indexGroupMap[groups[i]] = i;
+ }
+ }
+ indexGroupMap.erase(groupName);
+ groups = newGroups;
+ totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove);
+
+ for (int i = 0; i < counts.size(); i++) {
+ int num = counts[i][indexOfGroupToRemove];
+ counts[i].erase(counts[i].begin()+indexOfGroupToRemove);
+ totals[i] -= num;
+ total -= num;
+ if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you.
+ counts.erase(counts.begin()+i);
+ totals.erase(totals.begin()+i);
+ uniques--;
+ i--;
+ }
+ }
+ if (groups.size() == 0) { hasGroups = false; }
+ }
+ }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "removeGroup");
+ exit(1);
+ }
+}
+/************************************************************/
//vector of groups for the seq
vector<string> CountTable::getGroups(string seqName) {
try {
int CountTable::remove(string seqName) {
try {
map<string, int>::iterator it = indexNameMap.find(seqName);
- if (it == indexNameMap.end()) {
+ if (it != indexNameMap.end()) {
uniques--;
if (hasGroups){ //remove this sequences counts from group totals
for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; }
CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; uniques = 0; }
~CountTable() {}
+ //reads and creates smart enough to eliminate groups with zero counts
int createTable(set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames
int createTable(string, string, bool); //namefile, groupfile, createGroup
- int readTable(string);
+ int readTable(string);
+
int printTable(string);
int printHeaders(ofstream&);
int printSeq(ofstream&, string);
int getNumGroups() { return groups.size(); }
vector<string> getNamesOfGroups() { return groups; } //returns group names, if no group info vector is blank.
int addGroup(string);
+ int removeGroup(string);
int renameSeq(string, string); //used to change name of sequence for use with trees
int setAbund(string, string, int); //set abundance number of seqs for that group for that seq
else {
if (type == "fasta") { outputFileName = "unique" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "names"; }
- else if (type == "count") { outputFileName = "count.table"; }
+ else if (type == "count") { outputFileName = "count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
#include "getlineagecommand.h"
#include "sequence.hpp"
#include "listvector.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> GetLineageCommand::setParameters(){
if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
}
in.close();
out.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(outputFileName)) {
+ ct.readTable(outputFileName);
+ ct.printTable(outputFileName);
+ }
+
if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
in >> name; //read from first column
in >> tax; //read from second column
+ string noQuotesTax = m->removeQuotes(tax);
+
for (int j = 0; j < listOfTaxons.size(); j++) {
- string newtax = tax;
+ string newtax = noQuotesTax;
//if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
if (!taxonsHasConfidence[j]) {
- int hasConfidences = tax.find_first_of('(');
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences != string::npos) {
- newtax = tax;
+ newtax = noQuotesTax;
m->removeConfidences(newtax);
}
break;
}
}else{//if listOfTaxons[i] has them and you don't them remove taxons
- int hasConfidences = tax.find_first_of('(');
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences == string::npos) {
int pos = newtax.find(noConfidenceTaxons[j]);
}else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
//first remove confidences from both and see if the taxonomy exists
- string noNewTax = tax;
- int hasConfidences = tax.find_first_of('(');
+ string noNewTax = noQuotesTax;
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences != string::npos) {
- noNewTax = tax;
+ noNewTax = noQuotesTax;
m->removeConfidences(noNewTax);
}
try {
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
string GetOTURepCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label. The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
+ helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted and label. The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
- helpString += "The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n";
+ helpString += "The phylip or column parameter is required, but only one may be used. If you use a column file the name or count filename is required. \n";
helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
else {
if (type == "fasta") { outputFileName = "rep.fasta"; }
else if (type == "name") { outputFileName = "rep.names"; }
+ else if (type == "count") { outputFileName = "rep.count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ hasGroups = false;
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not found") { countfile = ""; }
+ else if (countfile == "not open") { abort = true; countfile = ""; }
+ else {
+ m->setCountTableFile(countfile);
+ ct.readTable(countfile);
+ if (ct.hasGroupInfo()) { hasGroups = true; }
+ }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
//give priority to column, then phylip
}else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
if (columnfile != "") {
- if (namefile == "") {
+ if ((namefile == "") && (countfile == "")) {
namefile = m->getNameFile();
if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
- }
+ }
}
//check for optional parameter and set defaults
sorted = "";
}
- if ((sorted == "group") && (groupfile == "")) {
- m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
+ if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
+ m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
sorted = "";
}
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
else {
- if (groupfile == "") {
+ if ((groupfile == "") && (!hasGroups)) {
m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
abort = true;
}else {
int error;
list = NULL;
- if (!large) {
- //read distance files
- if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }
- else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
- else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
-
- readMatrix->setCutoff(cutoff);
-
- if(namefile != ""){
- nameMap = new NameAssignment(namefile);
- nameMap->readMap();
- }else{ nameMap = NULL; }
-
- readMatrix->read(nameMap);
-
- if (m->control_pressed) { delete readMatrix; return 0; }
-
- list = readMatrix->getListVector();
-
- SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
-
- // Create a data structure to quickly access the distance information.
- // It consists of a vector of distance maps, where each map contains
- // all distances of a certain sequence. Vector and maps are accessed
- // via the index of a sequence in the distance matrix
- seqVec = vector<SeqMap>(list->size());
- for (int i = 0; i < matrix->seqVec.size(); i++) {
- for (int j = 0; j < matrix->seqVec[i].size(); j++) {
- if (m->control_pressed) { delete readMatrix; return 0; }
- //already added everyone else in row
- if (i < matrix->seqVec[i][j].index) { seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist; }
- }
- }
- //add dummy map for unweighted calc
- SeqMap dummy;
- seqVec.push_back(dummy);
-
- delete matrix;
- delete readMatrix;
- delete nameMap;
-
- if (m->control_pressed) { return 0; }
- }else {
- //process file and set up indexes
- if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }
- else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
- else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
-
- formatMatrix->setCutoff(cutoff);
-
- if(namefile != ""){
- nameMap = new NameAssignment(namefile);
- nameMap->readMap();
- }else{ nameMap = NULL; }
-
- formatMatrix->read(nameMap);
-
- if (m->control_pressed) { delete formatMatrix; return 0; }
-
- list = formatMatrix->getListVector();
-
- distFile = formatMatrix->getFormattedFileName();
-
- //positions in file where the distances for each sequence begin
- //rowPositions[1] = position in file where distance related to sequence 1 start.
- rowPositions = formatMatrix->getRowPositions();
- rowPositions.push_back(-1); //dummy row for unweighted calc
-
- delete formatMatrix;
- delete nameMap;
-
- //openfile for getMap to use
- m->openInputFile(distFile, inRow);
-
- if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
- }
-
-
- //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
- if (list != NULL) {
- vector<string> names;
- string binnames;
- //map names to rows in sparsematrix
- for (int i = 0; i < list->size(); i++) {
- names.clear();
- binnames = list->get(i);
-
- m->splitAtComma(binnames, names);
-
- for (int j = 0; j < names.size(); j++) {
- nameToIndex[names[j]] = i;
- }
- }
- } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
-
+ readDist();
- if (m->control_pressed) {
- if (large) { inRow.close(); m->mothurRemove(distFile); }
- return 0;
- }
+ if (m->control_pressed) { if (large) { inRow.close(); m->mothurRemove(distFile); } return 0; }
if (groupfile != "") {
//read in group map info.
if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = ""; }
if (Groups.size() != 0) {
- SharedUtil* util = new SharedUtil();
+ SharedUtil util;
vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
- util->setGroups(Groups, gNamesOfGroups, "getoturep");
+ util.setGroups(Groups, gNamesOfGroups, "getoturep");
groupMap->setNamesOfGroups(gNamesOfGroups);
- delete util;
}
- }
+ }else if (hasGroups) {
+ if (Groups.size() != 0) {
+ SharedUtil util;
+ vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+ util.setGroups(Groups, gNamesOfGroups, "getoturep");
+ }
+ }
//done with listvector from matrix
if (list != NULL) { delete list; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
exit(1);
}
}
+//**********************************************************************************************************************
+int GetOTURepCommand::readDist() {
+ try {
+
+ if (!large) {
+ //read distance files
+ if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }
+ else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
+ else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
+
+ readMatrix->setCutoff(cutoff);
+
+ NameAssignment* nameMap = NULL;
+ if(namefile != ""){
+ nameMap = new NameAssignment(namefile);
+ nameMap->readMap();
+ readMatrix->read(nameMap);
+ }else if (countfile != "") {
+ readMatrix->read(&ct);
+ }
+
+ if (m->control_pressed) { delete readMatrix; return 0; }
+
+ list = readMatrix->getListVector();
+ SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
+
+ // Create a data structure to quickly access the distance information.
+ // It consists of a vector of distance maps, where each map contains
+ // all distances of a certain sequence. Vector and maps are accessed
+ // via the index of a sequence in the distance matrix
+ seqVec = vector<SeqMap>(list->size());
+ for (int i = 0; i < matrix->seqVec.size(); i++) {
+ for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+ if (m->control_pressed) { delete readMatrix; return 0; }
+ //already added everyone else in row
+ if (i < matrix->seqVec[i][j].index) { seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist; }
+ }
+ }
+ //add dummy map for unweighted calc
+ SeqMap dummy;
+ seqVec.push_back(dummy);
+
+ delete matrix;
+ delete readMatrix;
+ delete nameMap;
+
+ if (m->control_pressed) { return 0; }
+ }else {
+ //process file and set up indexes
+ if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }
+ else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
+ else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
+
+ formatMatrix->setCutoff(cutoff);
+
+ NameAssignment* nameMap = NULL;
+ if(namefile != ""){
+ nameMap = new NameAssignment(namefile);
+ nameMap->readMap();
+ readMatrix->read(nameMap);
+ }else if (countfile != "") {
+ readMatrix->read(&ct);
+ }
+
+ if (m->control_pressed) { delete formatMatrix; return 0; }
+
+ list = formatMatrix->getListVector();
+ distFile = formatMatrix->getFormattedFileName();
+
+ //positions in file where the distances for each sequence begin
+ //rowPositions[1] = position in file where distance related to sequence 1 start.
+ rowPositions = formatMatrix->getRowPositions();
+ rowPositions.push_back(-1); //dummy row for unweighted calc
+
+ delete formatMatrix;
+ delete nameMap;
+
+ //openfile for getMap to use
+ m->openInputFile(distFile, inRow);
+
+ if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
+ }
+
+
+ //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
+ if (list != NULL) {
+ vector<string> names;
+ string binnames;
+ //map names to rows in sparsematrix
+ for (int i = 0; i < list->size(); i++) {
+ names.clear();
+ binnames = list->get(i);
+
+ m->splitAtComma(binnames, names);
+
+ for (int j = 0; j < names.size(); j++) {
+ nameToIndex[names[j]] = i;
+ }
+ }
+ } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
+ if (m->control_pressed) { if (large) { inRow.close(); m->mothurRemove(distFile); }return 0; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetOTURepCommand", "execute");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
void GetOTURepCommand::readNamesFile() {
try {
}
}
//**********************************************************************************************************************
-string GetOTURepCommand::findRep(vector<string> names) {
+string GetOTURepCommand::findRep(vector<string> names, string group) {
try{
// if only 1 sequence in bin or processing the "unique" label, then
// the first sequence of the OTU is the representative one
if ((names.size() == 1)) {
return names[0];
}else{
- vector<int> seqIndex(names.size());
- vector<float> max_dist(names.size());
- vector<float> total_dist(names.size());
+ vector<int> seqIndex; //(names.size());
map<string, string>::iterator itNameFile;
map<string, int>::iterator itNameIndex;
//fill seqIndex and initialize sums
for (size_t i = 0; i < names.size(); i++) {
if (weighted) {
- seqIndex[i] = nameToIndex[names[i]];
+ seqIndex.push_back(nameToIndex[names[i]]);
+ if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
+ int numRep = 0;
+ if (group != "") { numRep = ct.getGroupCount(names[i], group); }
+ else { numRep = ct.getGroupCount(names[i]); }
+ for (int j = 1; j < numRep; j++) { //don't add yourself again
+ seqIndex.push_back(nameToIndex[names[i]]);
+ }
+ }
}else {
if (namefile == "") {
itNameIndex = nameToIndex.find(names[i]);
if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
- if (large) { seqIndex[i] = (rowPositions.size()-1); }
- else { seqIndex[i] = (seqVec.size()-1); }
+ if (large) { seqIndex.push_back((rowPositions.size()-1)); }
+ else { seqIndex.push_back((seqVec.size()-1)); }
}else {
- seqIndex[i] = itNameIndex->second;
+ seqIndex.push_back(itNameIndex->second);
}
}else {
string name2 = itNameFile->second;
if (name1 == name2) { //then you are unique so add your real dists
- seqIndex[i] = nameToIndex[names[i]];
+ seqIndex.push_back(nameToIndex[names[i]]);
}else { //add dummy
- if (large) { seqIndex[i] = (rowPositions.size()-1); }
- else { seqIndex[i] = (seqVec.size()-1); }
+ if (large) { seqIndex.push_back((rowPositions.size()-1)); }
+ else { seqIndex.push_back((seqVec.size()-1)); }
}
}
}
}
- max_dist[i] = 0.0;
- total_dist[i] = 0.0;
}
+
+ vector<float> max_dist(seqIndex.size(), 0.0);
+ vector<float> total_dist(seqIndex.size(), 0.0);
// loop through all entries in seqIndex
SeqMap::iterator it;
map<string, ofstream*> filehandles;
if (Groups.size() == 0) { //you don't want to use groups
- outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + getOutputFileNameTag("name");
- m->openOutputFile(outputNamesFile, newNamesOutput);
- outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+ outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".";
+ if (countfile == "") {
+ outputNamesFile += getOutputFileNameTag("name");
+ outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+ }else {
+ outputNamesFile += getOutputFileNameTag("count");
+ outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile);
+ }
outputNameFiles[outputNamesFile] = processList->getLabel();
+ m->openOutputFile(outputNamesFile, newNamesOutput);
+ newNamesOutput << "noGroup" << endl;
}else{ //you want to use groups
ofstream* temp;
for (int i=0; i<Groups.size(); i++) {
temp = new ofstream;
filehandles[Groups[i]] = temp;
- outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + "." + getOutputFileNameTag("name");
+ outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
+ if (countfile == "") {
+ outputNamesFile += getOutputFileNameTag("name");
+ outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+ }else {
+ outputNamesFile += getOutputFileNameTag("count");
+ outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile);
+ }
m->openOutputFile(outputNamesFile, *(temp));
- outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+ *(temp) << Groups[i] << endl;
outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
}
}
m->splitAtComma(temp, namesInBin);
if (Groups.size() == 0) {
- nameRep = findRep(namesInBin);
+ nameRep = findRep(namesInBin, "");
newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
}else{
map<string, vector<string> > NamesInGroup;
}
for (int j=0; j<namesInBin.size(); j++) {
- string thisgroup = groupMap->getGroup(namesInBin[j]);
-
- if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
- NamesInGroup[thisgroup].push_back(namesInBin[j]);
- }
+ if (groupfile != "") {
+ string thisgroup = groupMap->getGroup(namesInBin[j]);
+ if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ //add this name to correct group
+ if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]); }
+ }else {
+ vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
+ for (int k = 0; k < thisSeqsGroups.size(); k++) {
+ if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]); }
+ }
+ }
}
//get rep for each group in otu
for (int j=0; j<Groups.size(); j++) {
if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
//get rep for each group
- nameRep = findRep(NamesInGroup[Groups[j]]);
+ nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
//output group rep and other members of this group
(*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
ofstream out2;
string tempNameFile = filename + ".temp";
m->openOutputFile(tempNameFile, out2);
-
+
ifstream in;
m->openInputFile(filename, in);
int i = 0;
+ string tempGroup = "";
+ in >> tempGroup; m->gobble(in);
+
+ CountTable thisCt;
+ if (countfile != "") {
+ thisCt.readTable(countfile);
+ if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
+ }
+
+ int thistotal = 0;
while (!in.eof()) {
string rep, binnames;
in >> i >> rep >> binnames; m->gobble(in);
- out2 << rep << '\t' << binnames << endl;
vector<string> names;
m->splitAtComma(binnames, names);
int binsize = names.size();
-
+
+ if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
+ else {
+ if (tempGroup == "noGroup") {
+ for (int j = 0; j < names.size(); j++) {
+ if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
+ }
+ binsize = thisCt.getNumSeqs(rep);
+ }else {
+ int total = 0;
+ for (int j = 0; j < names.size(); j++) { total += thisCt.getGroupCount(names[j], tempGroup); }
+ out2 << rep << '\t' << total << '\t' << total << endl;
+ binsize = total;
+ }
+ }
+ thistotal += binsize;
//if you have a groupfile
string group = "";
+ map<string, string> groups;
+ map<string, string>::iterator groupIt;
if (groupfile != "") {
- map<string, string> groups;
- map<string, string>::iterator groupIt;
-
//find the groups that are in this bin
- for (size_t i = 0; i < names.size(); i++) {
+ for (int i = 0; i < names.size(); i++) {
string groupName = groupMap->getGroup(names[i]);
if (groupName == "not found") {
m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
}
//rip off last dash
group = group.substr(0, group.length()-1);
- }else{ group = ""; }
+ }else if (hasGroups) {
+ map<string, string> groups;
+ for (int i = 0; i < names.size(); i++) {
+ vector<string> thisSeqsGroups = ct.getGroups(names[i]);
+ for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
+ }
+ //turn the groups into a string
+ for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
+ group += groupIt->first + "-";
+ }
+ //rip off last dash
+ group = group.substr(0, group.length()-1);
+ //cout << group << endl;
+ }
+ else{ group = ""; }
//print out name and sequence for that bin
if (sorted == "") { //print them out
rep = rep + "\t" + toString(i+1);
rep = rep + "|" + toString(binsize);
- if (groupfile != "") {
+ if (group != "") {
rep = rep + "|" + group;
}
out << ">" << rep << endl;
string sequence = fasta->getSequence(reps[i].name);
string outputName = reps[i].name + "\t" + toString(reps[i].bin);
outputName = outputName + "|" + toString(reps[i].size);
- if (groupfile != "") {
+ if (reps[i].group != "") {
outputName = outputName + "|" + reps[i].group;
}
out << ">" << outputName << endl;
in.close();
out.close();
out2.close();
-
+
m->mothurRemove(filename);
rename(tempNameFile.c_str(), filename.c_str());
+
+ if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); }
return 0;
int i = 0;
string rep, binnames;
+
+ string tempGroup = "";
+ in >> tempGroup; m->gobble(in);
+
+ CountTable thisCt;
+ if (countfile != "") {
+ thisCt.readTable(countfile);
+ if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
+ }
+
while (!in.eof()) {
if (m->control_pressed) { break; }
in >> i >> rep >> binnames; m->gobble(in);
- out2 << rep << '\t' << binnames << endl;
+
+ if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
+ else {
+ vector<string> names;
+ m->splitAtComma(binnames, names);
+ if (tempGroup == "noGroup") {
+ for (int j = 0; j < names.size(); j++) {
+ if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
+ }
+ }else {
+ int total = 0;
+ for (int j = 0; j < names.size(); j++) { total += thisCt.getGroupCount(names[j], tempGroup); }
+ out2 << rep << '\t' << total << '\t' << total << endl;
+ }
+ }
+
}
in.close();
out2.close();
m->mothurRemove(filename);
rename(tempNameFile.c_str(), filename.c_str());
+ if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); }
+
return 0;
}
catch(exception& e) {
#include "groupmap.h"
#include "readmatrix.hpp"
#include "formatmatrix.h"
+#include "counttable.h"
typedef map<int, float> SeqMap;
ReadMatrix* readMatrix;
FormatMatrix* formatMatrix;
NameAssignment* nameMap;
- string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, columnfile, distFile, format, outputDir, groups;
+ CountTable ct;
+ string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, countfile, columnfile, distFile, format, outputDir, groups;
ofstream out;
ifstream in, inNames, inRow;
- bool abort, allLines, groupError, large, weighted;
+ bool abort, allLines, groupError, large, weighted, hasGroups;
set<string> labels; //holds labels to be used
map<string, int> nameToIndex; //maps sequence name to index in sparsematrix
map<string, string> nameFileMap;
void readNamesFile(bool);
int process(ListVector*);
SeqMap getMap(int);
- string findRep(vector<string>); // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
+ string findRep(vector<string>, string); // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
int processNames(string, string);
int processFastaNames(string, string);
+ int readDist();
};
#endif
#include "getseqscommand.h"
#include "sequence.hpp"
#include "listvector.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> GetSeqsCommand::setParameters(){
if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
}
in.close();
out.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(outputFileName)) {
+ ct.readTable(outputFileName);
+ ct.printTable(outputFileName);
+ }
if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
//parse out names that are in accnos file
string binnames = list.get(i);
+ vector<string> bnames;
+ m->splitAtComma(binnames, bnames);
string newNames = "";
- while (binnames.find_first_of(',') != -1) {
- string name = binnames.substr(0,binnames.find_first_of(','));
- binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-
+ for (int i = 0; i < bnames.size(); i++) {
+ string name = bnames[i];
//if that name is in the .accnos file, add it
if (names.count(name) != 0) { newNames += name + ","; selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
}
- //get last name
- if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; if (m->debug) { sanity["list"].insert(binnames); } }
-
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
if (m->control_pressed) { return metadata; }
//if there is a bin label use it otherwise make one
- string binLabel = binTag;
- string sbinNumber = otuLabels[i];
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { binLabel += "0"; }
- }
- binLabel += sbinNumber;
-
- labelTaxMap[binLabel] = taxs[i];
+ if (m->isContainingOnlyDigits(otuLabels[i])) {
+ string binLabel = binTag;
+ string sbinNumber = otuLabels[i];
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ labelTaxMap[binLabel] = taxs[i];
+ }else { labelTaxMap[otuLabels[i]] = taxs[i]; }
}
}
}
/**************************************************************************************************/
+string MothurOut::removeQuotes(string tax) {
+ try {
+
+ string taxon;
+ string newTax = "";
+
+ for (int i = 0; i < tax.length(); i++) {
+
+ if (control_pressed) { return newTax; }
+
+ if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
+
+ }
+
+ return newTax;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "removeQuotes");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
void splitAtChar(string&, vector<string>&, char);
void splitAtChar(string&, string&, char);
int removeConfidences(string&);
+ string removeQuotes(string);
string makeList(vector<string>&);
bool isSubset(vector<string>, vector<string>); //bigSet, subset
vector<string> PreClusterCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
string helpString = "";
helpString += "The pre.cluster command groups sequences that are within a given number of base mismatches.\n";
helpString += "The pre.cluster command outputs a new fasta and name file.\n";
- helpString += "The pre.cluster command parameters are fasta, names and diffs. The fasta parameter is required. \n";
- helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
+ helpString += "The pre.cluster command parameters are fasta, name, group, count, processors and diffs. The fasta parameter is required. \n";
+ helpString += "The name parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
helpString += "The group parameter allows you to provide a group file so you can cluster by group. \n";
+ helpString += "The count parameter allows you to provide a count file so you can cluster by group. \n";
helpString += "The diffs parameter allows you to specify maximum number of mismatched bases allowed between sequences in a grouping. The default is 1.\n";
helpString += "The pre.cluster command should be in the following format: \n";
helpString += "pre.cluster(fasta=yourFastaFile, names=yourNamesFile, diffs=yourMaxDiffs) \n";
else {
if (type == "fasta") { outputFileName = "precluster" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "precluster.names"; }
+ else if (type == "count") { outputFileName = "precluster.count_table"; }
else if (type == "map") { outputFileName = "precluster.map"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["map"] = tempOutNames;
}
catch(exception& e) {
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["map"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (groupfile == "not found") { groupfile = ""; bygroup = false; }
else if (groupfile == "not open") { abort = true; groupfile = ""; }
else { m->setGroupFile(groupfile); bygroup = true; }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not found") { countfile = ""; }
+ else if (countfile == "not open") { abort = true; countfile = ""; }
+ else {
+ m->setCountTableFile(countfile);
+ ct.readTable(countfile);
+ if (ct.hasGroupInfo()) { bygroup = true; }
+ else { bygroup = false; }
+ }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
string temp = validParameter.validFile(parameters, "diffs", false); if(temp == "not found"){ temp = "1"; }
m->mothurConvert(temp, diffs);
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
string newFastaFile = fileroot + getOutputFileNameTag("fasta", fastafile);
string newNamesFile = fileroot + getOutputFileNameTag("name");
+ string newCountFile = fileroot + getOutputFileNameTag("count");
string newMapFile = fileroot + getOutputFileNameTag("map"); //add group name if by group
outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile);
- outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile);
-
+ if (countfile == "") { outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); }
+ else { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); }
if (bygroup) {
//clear out old files
newMapFile = fileroot + "precluster.";
//parse fasta and name file by group
- SequenceParser* parser;
- if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile); }
- else { parser = new SequenceParser(groupfile, fastafile); }
-
- vector<string> groups = parser->getNamesOfGroups();
-
- if(processors == 1) { driverGroups(parser, newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); }
- else { createProcessesGroups(parser, newFastaFile, newNamesFile, newMapFile, groups); }
-
- delete parser;
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- //run unique.seqs for deconvolute results
- string inputString = "fasta=" + newFastaFile;
- if (namefile != "") { inputString += ", name=" + newNamesFile; }
- m->mothurOutEndLine();
- m->mothurOut("/******************************************/"); m->mothurOutEndLine();
- m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
- m->mothurCalling = true;
+ vector<string> groups;
+ if (countfile != "") {
+ cparser = new SequenceCountParser(countfile, fastafile);
+ groups = cparser->getNamesOfGroups();
+ }else {
+ if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile); }
+ else { parser = new SequenceParser(groupfile, fastafile); }
+ groups = parser->getNamesOfGroups();
+ }
- Command* uniqueCommand = new DeconvoluteCommand(inputString);
- uniqueCommand->execute();
-
- map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
-
- delete uniqueCommand;
- m->mothurCalling = false;
- m->mothurOut("/******************************************/"); m->mothurOutEndLine();
-
- m->renameFile(filenames["fasta"][0], newFastaFile);
- m->renameFile(filenames["name"][0], newNamesFile);
-
+ if(processors == 1) { driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); }
+ else { createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups); }
+
+ if (countfile != "") {
+ mergeGroupCounts(newCountFile, newNamesFile, newFastaFile);
+ delete cparser;
+ }else {
+ delete parser;
+ //run unique.seqs for deconvolute results
+ string inputString = "fasta=" + newFastaFile;
+ if (namefile != "") { inputString += ", name=" + newNamesFile; }
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* uniqueCommand = new DeconvoluteCommand(inputString);
+ uniqueCommand->execute();
+
+ map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+
+ delete uniqueCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ m->renameFile(filenames["fasta"][0], newFastaFile);
+ m->renameFile(filenames["name"][0], newNamesFile);
+ }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine();
}else {
m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
- printData(newFastaFile, newNamesFile);
-
+ if (countfile != "") { newNamesFile = newCountFile; }
+ printData(newFastaFile, newNamesFile, "");
+
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
}
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
return 0;
}
}
/**************************************************************************************************/
-int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newFName, string newNName, string newMFile, vector<string> groups) {
+int PreClusterCommand::createProcessesGroups(string newFName, string newNName, string newMFile, vector<string> groups) {
try {
vector<int> processIDS;
process++;
}else if (pid == 0){
outputNames.clear();
- num = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
+ num = driverGroups(newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
string tempFile = toString(getpid()) + ".outputNames.temp";
ofstream outTemp;
}
//do my part
- num = driverGroups(parser, newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
+ num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
// Allocate memory for thread data.
string extension = toString(i) + ".temp";
- preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
+ preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
pDataArray.push_back(tempPreCluster);
processIDS.push_back(i);
//using the main process as a worker saves time and memory
- num = driverGroups(parser, newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
+ num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
//Wait until all threads have terminated.
WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
}
}
/**************************************************************************************************/
-int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
+int PreClusterCommand::driverGroups(string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
try {
int numSeqs = 0;
m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
map<string, string> thisNameMap;
- if (namefile != "") { thisNameMap = parser->getNameMap(groups[i]); }
- vector<Sequence> thisSeqs = parser->getSeqs(groups[i]);
-
+ vector<Sequence> thisSeqs;
+ if (groupfile != "") {
+ thisSeqs = parser->getSeqs(groups[i]);
+ }else if (countfile != "") {
+ thisSeqs = cparser->getSeqs(groups[i]);
+ }
+ if (namefile != "") { thisNameMap = parser->getNameMap(groups[i]); }
+
//fill alignSeqs with this groups info.
- numSeqs = loadSeqs(thisNameMap, thisSeqs);
+ numSeqs = loadSeqs(thisNameMap, thisSeqs, groups[i]);
if (m->control_pressed) { return 0; }
if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
- int count = process(newMFile+groups[i]+".map");
+ int count= process(newMFile+groups[i]+".map");
outputNames.push_back(newMFile+groups[i]+".map"); outputTypes["map"].push_back(newMFile+groups[i]+".map");
if (m->control_pressed) { return 0; }
m->mothurOut("Total number of sequences before pre.cluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
- printData(newFFile, newNFile);
+ printData(newFFile, newNFile, groups[i]);
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
//ifstream inNames;
ifstream inFasta;
- //m->openInputFile(namefile, inNames);
m->openInputFile(fastafile, inFasta);
-
- //string firstCol, secondCol, nameString;
set<int> lengths;
while (!inFasta.eof()) {
if (m->control_pressed) { inFasta.close(); return 0; }
-
- //inNames >> firstCol >> secondCol;
- //nameString = secondCol;
-
- //m->gobble(inNames);
- //int size = 1;
- //while (secondCol.find_first_of(',') != -1) {
- // size++;
- // secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
- //}
-
+
Sequence seq(inFasta); m->gobble(inFasta);
if (seq.getName() != "") { //can get "" if commented line is at end of fasta file
lengths.insert(seq.getAligned().length());
}
}else { //no names file, you are identical to yourself
- seqPNode tempNode(1, seq, seq.getName());
+ int numRep = 1;
+ if (countfile != "") { numRep = ct.getNumSeqs(seq.getName()); }
+ seqPNode tempNode(numRep, seq, seq.getName());
alignSeqs.push_back(tempNode);
lengths.insert(seq.getAligned().length());
}
}
}
inFasta.close();
- //inNames.close();
if (lengths.size() > 1) { m->control_pressed = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
else if (lengths.size() == 1) { length = *(lengths.begin()); }
}
}
/**************************************************************************************************/
-int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs){
+int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs, string group){
try {
set<int> lengths;
alignSeqs.clear();
map<string, string>::iterator it;
bool error = false;
-
+ map<string, int> thisCount;
+ if (countfile != "") { thisCount = cparser->getCountTable(group); }
+
for (int i = 0; i < thisSeqs.size(); i++) {
if (m->control_pressed) { return 0; }
lengths.insert(thisSeqs[i].getAligned().length());
}
}else { //no names file, you are identical to yourself
- seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
+ int numRep = 1;
+ if (countfile != "") {
+ map<string, int>::iterator it2 = thisCount.find(thisSeqs[i].getName());
+
+ //should never be true since parser checks for this
+ if (it2 == thisCount.end()) { m->mothurOut(thisSeqs[i].getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); error = true; }
+ else { numRep = it2->second; }
+ }
+ seqPNode tempNode(numRep, thisSeqs[i], thisSeqs[i].getName());
alignSeqs.push_back(tempNode);
lengths.insert(thisSeqs[i].getAligned().length());
}
}
-
+
if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
else if (lengths.size() == 1) { length = *(lengths.begin()); }
exit(1);
}
}
+/**************************************************************************************************/
+
+int PreClusterCommand::mergeGroupCounts(string newcount, string newname, string newfasta){
+ try {
+ ifstream inNames;
+ m->openInputFile(newname, inNames);
+
+ string group, first, second;
+ set<string> uniqueNames;
+ while (!inNames.eof()) {
+ if (m->control_pressed) { break; }
+ inNames >> group; m->gobble(inNames);
+ inNames >> first; m->gobble(inNames);
+ inNames >> second; m->gobble(inNames);
+
+ vector<string> names;
+ m->splitAtComma(second, names);
+
+ uniqueNames.insert(first);
+
+ int total = ct.getGroupCount(first, group);
+ for (int i = 1; i < names.size(); i++) {
+ total += ct.getGroupCount(names[i], group);
+ ct.setAbund(names[i], group, 0);
+ }
+ ct.setAbund(first, group, total);
+ }
+ inNames.close();
+
+ vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
+ if (ct.getNumSeqs(namesOfSeqs[i]) == 0) {
+ ct.remove(namesOfSeqs[i]);
+ }
+ }
+
+ ct.printTable(newcount);
+ m->mothurRemove(newname);
+
+ if (bygroup) { //if by group, must remove the duplicate seqs that are named the same
+ ifstream in;
+ m->openInputFile(newfasta, in);
+
+ ofstream out;
+ m->openOutputFile(newfasta+"temp", out);
+
+ int count = 0;
+ set<string> already;
+ while(!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ Sequence seq(in); m->gobble(in);
+
+ if (seq.getName() != "") {
+ count++;
+ if (already.count(seq.getName()) == 0) {
+ seq.printSequence(out);
+ already.insert(seq.getName());
+ }
+ }
+ }
+ in.close();
+ out.close();
+ m->mothurRemove(newfasta);
+ m->renameFile(newfasta+"temp", newfasta);
+ }
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PreClusterCommand", "mergeGroupCounts");
+ exit(1);
+ }
+}
/**************************************************************************************************/
-void PreClusterCommand::printData(string newfasta, string newname){
+void PreClusterCommand::printData(string newfasta, string newname, string group){
try {
ofstream outFasta;
ofstream outNames;
m->openOutputFile(newname, outNames);
}
+ if ((countfile != "") && (group == "")) { outNames << "Representative_Sequence\ttotal\n"; }
for (int i = 0; i < alignSeqs.size(); i++) {
if (alignSeqs[i].numIdentical != 0) {
alignSeqs[i].seq.printSequence(outFasta);
- outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ if (countfile != "") {
+ if (group != "") { outNames << group << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
+ else { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].numIdentical << endl; }
+ }else { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
}
}
#include "command.hpp"
#include "sequence.hpp"
#include "sequenceparser.h"
+#include "sequencecountparser.h"
/************************************************************/
struct seqPNode {
~seqPNode() {}
};
/************************************************************/
-inline bool comparePriority(seqPNode first, seqPNode second) { return (first.numIdentical > second.numIdentical); }
+inline bool comparePriority(seqPNode first, seqPNode second) {
+ if (first.numIdentical > second.numIdentical) { return true; }
+ else if (first.numIdentical == second.numIdentical) {
+ if (first.seq.getName() > second.seq.getName()) { return true; }
+ }
+ return false;
+}
//************************************************************/
class PreClusterCommand : public Command {
linePair(int i, int j) : start(i), end(j) {}
};
+ SequenceParser* parser;
+ SequenceCountParser* cparser;
+ CountTable ct;
+
int diffs, length, processors;
bool abort, bygroup;
- string fastafile, namefile, outputDir, groupfile;
+ string fastafile, namefile, outputDir, groupfile, countfile;
vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
map<string, string> names; //represents the names file first column maps to second column
map<string, int> sizes; //this map a seq name to the number of identical seqs in the names file
void readNameFile();
//int readNamesFASTA();
int calcMisMatches(string, string);
- void printData(string, string); //fasta filename, names file name
+ void printData(string, string, string); //fasta filename, names file name
int process(string);
- int loadSeqs(map<string, string>&, vector<Sequence>&);
- int driverGroups(SequenceParser*, string, string, string, int, int, vector<string> groups);
- int createProcessesGroups(SequenceParser*, string, string, string, vector<string>);
+ int loadSeqs(map<string, string>&, vector<Sequence>&, string);
+ int driverGroups(string, string, string, int, int, vector<string> groups);
+ int createProcessesGroups(string, string, string, vector<string>);
+ int mergeGroupCounts(string, string, string);
};
/**************************************************************************************************/
struct preClusterData {
string fastafile;
string namefile;
- string groupfile;
+ string groupfile, countfile;
string newFName, newNName, newMName;
MothurOut* m;
int start;
vector<string> mapFileNames;
preClusterData(){}
- preClusterData(string f, string n, string g, string nff, string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
+ preClusterData(string f, string n, string g, string c, string nff, string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
fastafile = f;
namefile = n;
groupfile = g;
diffs = d;
threadID = tid;
groups = gr;
+ countfile = c;
}
};
//parse fasta and name file by group
SequenceParser* parser;
- if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile); }
- else { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile); }
-
- int numSeqs = 0;
+ SequenceCountParser* cparser;
+ if (pDataArray->countfile != "") {
+ cparser = new SequenceCountParser(pDataArray->countfile, pDataArray->fastafile);
+ }else {
+ if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile); }
+ else { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile); }
+ }
+
+ int numSeqs = 0;
vector<seqPNode> alignSeqs;
//clear out old files
ofstream outF; pDataArray->m->openOutputFile(pDataArray->newFName, outF); outF.close();
pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine();
map<string, string> thisNameMap;
- if (pDataArray->namefile != "") { thisNameMap = parser->getNameMap(pDataArray->groups[k]); }
- vector<Sequence> thisSeqs = parser->getSeqs(pDataArray->groups[k]);
+ vector<Sequence> thisSeqs;
+ if (pDataArray->groupfile != "") {
+ thisSeqs = parser->getSeqs(pDataArray->groups[k]);
+ }else if (pDataArray->countfile != "") {
+ thisSeqs = cparser->getSeqs(pDataArray->groups[k]);
+ }
+ if (pDataArray->namefile != "") { thisNameMap = parser->getNameMap(pDataArray->groups[k]); }
//fill alignSeqs with this groups info.
////////////////////////////////////////////////////
alignSeqs.clear();
map<string, string>::iterator it;
bool error = false;
+ map<string, int> thisCount;
+ if (pDataArray->countfile != "") { thisCount = cparser->getCountTable(pDataArray->groups[k]); }
+
for (int i = 0; i < thisSeqs.size(); i++) {
if (thisSeqs[i].getAligned().length() > length) { length = thisSeqs[i].getAligned().length(); }
}
}else { //no names file, you are identical to yourself
- seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
- alignSeqs.push_back(tempNode);
+ int numRep = 1;
+ if (pDataArray->countfile != "") {
+ map<string, int>::iterator it2 = thisCount.find(thisSeqs[i].getName());
+
+ //should never be true since parser checks for this
+ if (it2 == thisCount.end()) { pDataArray->m->mothurOut(thisSeqs[i].getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); error = true; }
+ else { numRep = it2->second; }
+ }
+ seqPNode tempNode(numRep, thisSeqs[i], thisSeqs[i].getName());
+ alignSeqs.push_back(tempNode);
if (thisSeqs[i].getAligned().length() > length) { length = thisSeqs[i].getAligned().length(); }
}
}
for (int i = 0; i < alignSeqs.size(); i++) {
if (alignSeqs[i].numIdentical != 0) {
alignSeqs[i].seq.printSequence(outFasta);
- outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ if (pDataArray->countfile != "") { outNames << pDataArray->groups[k] << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ }else { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
+
}
}
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
#include "removelineagecommand.h"
#include "sequence.hpp"
#include "listvector.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> RemoveLineageCommand::setParameters(){
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
}
in.close();
out.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(outputFileName)) {
+ ct.readTable(outputFileName);
+ ct.printTable(outputFileName);
+ }
if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
bool remove = false;
+ string noQuotesTax = m->removeQuotes(tax);
+
for (int j = 0; j < listOfTaxons.size(); j++) {
- string newtax = tax;
+ string newtax = noQuotesTax;
//if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
if (!taxonsHasConfidence[j]) {
- int hasConfidences = tax.find_first_of('(');
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences != string::npos) {
- newtax = tax;
+ newtax = noQuotesTax;
m->removeConfidences(newtax);
}
}
}else{//if taxons has them and you don't them remove taxons
- int hasConfidences = tax.find_first_of('(');
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences == string::npos) {
int pos = newtax.find(noConfidenceTaxons[j]);
}else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
//first remove confidences from both and see if the taxonomy exists
- string noNewTax = tax;
- int hasConfidences = tax.find_first_of('(');
+ string noNewTax = noQuotesTax;
+ int hasConfidences = noQuotesTax.find_first_of('(');
if (hasConfidences != string::npos) {
- noNewTax = tax;
+ noNewTax = noQuotesTax;
m->removeConfidences(noNewTax);
}
#include "removeseqscommand.h"
#include "sequence.hpp"
#include "listvector.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> RemoveSeqsCommand::setParameters(){
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
- else if (type == "count") { outputFileName = "pick.count.table"; }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
}
in.close();
out.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(outputFileName)) {
+ ct.readTable(outputFileName);
+ ct.printTable(outputFileName);
+ }
+
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
*/
#include "screenseqscommand.h"
-
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> ScreenSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
string ScreenSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The screen.seqs command reads a fastafile and creates .....\n";
- helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
+ helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
+ helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
if (type == "fasta") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "accnos") { outputFileName = "bad.accnos"; }
else if (type == "qfile") { outputFileName = "good" + m->getExtension(inputName); }
outputTypes["accnos"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
outputTypes["accnos"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
alignreport = validParameter.validFile(parameters, "alignreport", true);
if (alignreport == "not open") { abort = true; }
else if (alignreport == "not found") { alignreport = ""; }
temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){ temp = "90"; }
m->mothurConvert(temp, criteria);
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
if (optimize.size() != 0) { //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
//use the namefile to optimize correctly
if (namefile != "") { nameMap = m->readNames(namefile); }
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameMap = ct.getNameMap();
+ }
getSummary(positions);
}
else {
screenNameGroupFile(badSeqNames);
if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
}else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the group
-
+ else if (countfile != "") { screenCountFile(badSeqNames); }
+
+
if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
if(alignreport != "") { screenAlignReport(badSeqNames); }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
m->mothurOutEndLine();
exit(1);
}
}
+//***************************************************************************************************************
+int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
+ try {
+ ifstream in;
+ m->openInputFile(countfile, in);
+ set<string>::iterator it;
+
+ string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile);
+ ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut);
+
+ string headers = m->getline(in); m->gobble(in);
+ goodCountOut << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+
+ it = badSeqNames.find(name);
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }
+ else{
+ goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
+ }
+ }
+
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ in.close();
+ goodCountOut.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(goodCountFile)) {
+ ct.readTable(goodCountFile);
+ ct.printTable(goodCountFile);
+ }
+
+ if (m->control_pressed) { m->mothurRemove(goodCountFile); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
+ exit(1);
+ }
+}
//***************************************************************************************************************
int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
int screenNameGroupFile(set<string>);
int screenGroupFile(set<string>);
+ int screenCountFile(set<string>);
int screenAlignReport(set<string>);
int screenQual(set<string>);
int screenTaxonomy(set<string>);
#endif
bool abort;
- string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
+ string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile;
int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
vector<string> outputNames;
vector<string> optimize;
#include "splitgroupscommand.h"
#include "sharedutilities.h"
#include "sequenceparser.h"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> SplitGroupCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "CountGroup", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none",false,false); parameters.push_back(pgroup);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string SplitGroupCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The split.group command reads a group file, and parses your fasta and names files by groups. \n";
- helpString += "The split.group command parameters are fasta, name, group and groups.\n";
- helpString += "The fasta and group parameters are required.\n";
+ helpString += "The split.group command reads a group or count file, and parses your fasta and names or count files by groups. \n";
+ helpString += "The split.group command parameters are fasta, name, group, count and groups.\n";
+ helpString += "The fasta and group or count parameters are required.\n";
helpString += "The groups parameter allows you to select groups to create files for. \n";
helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files. \n";
helpString += "If you want .fasta and .names files for all groups, set groups=all. \n";
else {
if (type == "fasta") { outputFileName = "fasta"; }
else if (type == "name") { outputFileName = "names"; }
+ else if (type == "count") { outputFileName = "count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand");
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
-
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
groupfile = validParameter.validFile(parameters, "group", true);
if (groupfile == "not open") { groupfile = ""; abort = true; }
- else if (groupfile == "not found") {
- groupfile = m->getGroupFile();
- if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
+ else if (groupfile == "not found") { groupfile = "";
}else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
+ if ((countfile != "") && (groupfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
+
+ if ((countfile == "") && (groupfile == "")) {
+ if (namefile == "") { //check for count then group
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }else { //check for group
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
else { m->splitAtDash(groups, Groups); }
//if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(groupfile); }
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ if (groupfile != "") { outputDir = m->hasPath(groupfile); }
+ else { outputDir = m->hasPath(countfile); }
+ }
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- SequenceParser* parser;
+ if (countfile == "" ) { runNameGroup(); }
+ else { runCount(); }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ string current = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+ }
+
+ itTypes = outputTypes.find("name");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+ }
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitGroupCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SplitGroupCommand::runNameGroup(){
+ try {
+ SequenceParser* parser;
if (namefile == "") { parser = new SequenceParser(groupfile, fastafile); }
else { parser = new SequenceParser(groupfile, fastafile, namefile); }
if (m->control_pressed) { delete parser; return 0; }
-
+
vector<string> namesGroups = parser->getNamesOfGroups();
SharedUtil util; util.setGroups(Groups, namesGroups);
parser->getSeqs(Groups[i], newFasta, false);
outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
+
if (namefile != "") {
parser->getNameMap(Groups[i], newName);
outputNames.push_back(newName); outputTypes["name"].push_back(newName);
}
delete parser;
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- string current = "";
- itTypes = outputTypes.find("fasta");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
- }
-
- itTypes = outputTypes.find("name");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
- }
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-
- return 0;
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitGroupCommand", "runNameGroup");
+ exit(1);
}
+}
+//**********************************************************************************************************************
+int SplitGroupCommand::runCount(){
+ try {
+
+ CountTable ct;
+ ct.readTable(countfile);
+ if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
+
+ if (m->control_pressed) { return 0; }
+
+ vector<string> namesGroups = ct.getNamesOfGroups();
+ SharedUtil util; util.setGroups(Groups, namesGroups);
+
+ //fill filehandles with neccessary ofstreams
+ map<string, ofstream*> ffiles;
+ map<string, ofstream*> cfiles;
+ ofstream* temp;
+ for (int i=0; i<Groups.size(); i++) {
+ temp = new ofstream;
+ ffiles[Groups[i]] = temp;
+ string newFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + Groups[i] + "." + getOutputFileNameTag("fasta");
+ outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
+ m->openOutputFile(newFasta, (*temp));
+ temp = new ofstream;
+ cfiles[Groups[i]] = temp;
+ string newCount = outputDir + m->getRootName(m->getSimpleName(countfile)) + Groups[i] + "." + getOutputFileNameTag("count");
+ m->openOutputFile(newCount, (*temp));
+ outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
+ (*temp) << "Representative_Sequence\ttotal\t" << Groups[i] << endl;
+ }
+
+ ifstream in;
+ m->openInputFile(fastafile, in);
+
+ while (!in.eof()) {
+ Sequence seq(in); m->gobble(in);
+
+ if (m->control_pressed) { break; }
+ if (seq.getName() != "") {
+ vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
+ for (int i = 0; i < thisSeqsGroups.size(); i++) {
+ if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
+ seq.printSequence(*(ffiles[thisSeqsGroups[i]]));
+ int numSeqs = ct.getGroupCount(seq.getName(), Groups[i]);
+ (*(cfiles[thisSeqsGroups[i]])) << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl;
+ }
+ }
+ }
+ }
+ in.close();
+
+ //close and delete ofstreams
+ for (int i=0; i<Groups.size(); i++) {
+ (*ffiles[Groups[i]]).close(); delete ffiles[Groups[i]];
+ (*cfiles[Groups[i]]).close(); delete cfiles[Groups[i]];
+ }
+
+ return 0;
+
+ }
catch(exception& e) {
- m->errorOut(e, "SplitGroupCommand", "execute");
+ m->errorOut(e, "SplitGroupCommand", "runCount");
exit(1);
}
}
private:
vector<string> outputNames;
- string outputDir, namefile, groupfile, groups, fastafile;
+ string outputDir, namefile, groupfile, countfile, groups, fastafile;
vector<string> Groups;
bool abort;
+
+ int runNameGroup();
+ int runCount();
};
/***************************************************************************************/
try {
CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
- CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "countcolumn",false,false); parameters.push_back(pcount);
+ CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName-countcolumn",false,false); parameters.push_back(pcolumn);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
if ((phylipfile == "") && (columnfile == "") && (sharedfile == "")) {
//is there are current file available for either of these?
else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); m->mothurOutEndLine(); abort = true; }
if (columnfile != "") {
- if (namefile == "") {
+ if ((namefile == "") && (countfile == "")){
namefile = m->getNameFile();
if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
}
}
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
readMatrix->setCutoff(cutoff);
- if(namefile != ""){
- nameMap = new NameAssignment(namefile);
- nameMap->readMap();
- }
- else{ nameMap = NULL; }
-
- readMatrix->read(nameMap);
+ ct = NULL;
+ if(namefile != ""){
+ nameMap = new NameAssignment(namefile);
+ nameMap->readMap();
+ readMatrix->read(nameMap);
+ }else if (countfile != "") {
+ ct = new CountTable();
+ ct->readTable(countfile);
+ readMatrix->read(ct);
+ }
+
list = readMatrix->getListVector();
SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
//make treemap
+ if (ct != NULL) { delete ct; }
ct = new CountTable();
set<string> nameMap;
map<string, string> groupMap;
//clear globaldatas old tree names if any
m->Treenames.clear();
-
+
//fills globaldatas tree names
m->Treenames = m->getGroups();
vector<Calculator*> treeCalculators;
vector<SharedRAbundVector*> lookup;
string lastLabel;
- string format, groupNames, filename, sharedfile, inputfile;
+ string format, groupNames, filename, sharedfile, countfile, inputfile;
int numGroups, subsampleSize, iters, processors;
ofstream out;
float precision, cutoff;
return "not found";
}
-}
-/************************************************************
-void TreeMap::setIndex(string seq, int index) {
- it = treemap.find(seq);
- if (it != treemap.end()) { //sequence name was in group file
- treemap[seq].vectorIndex = index;
- }else {
- treemap[seq].vectorIndex = index;
- treemap[seq].groupname = "not found";
- }
-}
-/************************************************************
-int TreeMap::getIndex(string seq) {
-
- it = treemap.find(seq);
- // if it is a valid sequence name then return index
- if (it != treemap.end()) { return treemap[seq].vectorIndex; }
- // if not return error code
- else { return -1; }
-
}
/************************************************************/
else if (type == "fasta") { outputFileName = "fasta"; }
else if (type == "group") { outputFileName = "groups"; }
else if (type == "name") { outputFileName = "names"; }
- else if (type == "count") { outputFileName = "count.table"; }
+ else if (type == "count") { outputFileName = "count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;