tree[0].heirarchyID = "0";
maxLevel = 0;
calcTotals = true;
+ addSeqToTree("unknown", "unknown;");
}
catch(exception& e) {
m->errorOut(e, "PhyloTree", "PhyloTree");
try {
m = MothurOut::getInstance();
calcTotals = false;
+ numNodes = 0;
+ numSeqs = 0;
#ifdef USE_MPI
MPI_File inMPI;
istringstream iss (tempBuf,istringstream::in);
delete buffer;
- iss >> numNodes; gobble(iss);
+ //read version
+ m->getline(iss); m->gobble(iss);
+
+ iss >> numNodes; m->gobble(iss);
tree.resize(numNodes);
for (int i = 0; i < tree.size(); i++) {
- iss >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(iss);
+ iss >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(iss);
}
//read genus nodes
int numGenus = 0;
- iss >> numGenus; gobble(iss);
+ iss >> numGenus; m->gobble(iss);
int gnode, gsize;
totals.clear();
for (int i = 0; i < numGenus; i++) {
- iss >> gnode >> gsize; gobble(iss);
+ iss >> gnode >> gsize; m->gobble(iss);
uniqueTaxonomies[gnode] = gnode;
totals.push_back(gsize);
MPI_File_close(&inMPI);
#else
- in >> numNodes; gobble(in);
+ //read version
+ string line = m->getline(in); m->gobble(in);
+
+ in >> numNodes; m->gobble(in);
tree.resize(numNodes);
for (int i = 0; i < tree.size(); i++) {
- in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in);
+ in >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(in);
}
//read genus nodes
int numGenus = 0;
- in >> numGenus; gobble(in);
+ in >> numGenus; m->gobble(in);
int gnode, gsize;
totals.clear();
for (int i = 0; i < numGenus; i++) {
- in >> gnode >> gsize; gobble(in);
+ in >> gnode >> gsize; m->gobble(in);
uniqueTaxonomies[gnode] = gnode;
totals.push_back(gsize);
maxLevel = 0;
calcTotals = true;
string name, tax;
-
#ifdef USE_MPI
- int pid, num;
- vector<long> positions;
+ int pid, num, processors;
+ vector<unsigned long long> positions;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
char inFileName[1024];
strcpy(inFileName, tfile.c_str());
MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
if (pid == 0) {
- positions = setFilePosEachLine(tfile, num);
+ positions = m->setFilePosEachLine(tfile, num);
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, 2001, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(num);
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&num, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, 2001, MPI_COMM_WORLD, &status);
}
//read file
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- ifstream in;
- openInputFile(tfile, in);
-
- //read in users taxonomy file and add sequences to tree
- while(!in.eof()){
- in >> name >> tax; gobble(in);
-
- addSeqToTree(name, tax);
- }
- in.close();
+ map<string, string> temp;
+ m->readTax(tfile, temp);
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+ addSeqToTree(itTemp->first, itTemp->second);
+ temp.erase(itTemp++);
+ }
#endif
-
+
assignHeirarchyIDs(0);
-
+
+
+ string unknownTax = "unknown;";
+ //added last taxon until you get desired level
+ for (int i = 1; i < maxLevel; i++) {
+ unknownTax += "unclassfied;";
+ }
+
+ addSeqToTree("unknown", unknownTax);
+
//create file for summary if needed
setUp(tfile);
}
/**************************************************************************************************/
-string PhyloTree::getNextTaxon(string& heirarchy){
+string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
try {
string currentLevel = "";
if(heirarchy != ""){
int pos = heirarchy.find_first_of(';');
- currentLevel=heirarchy.substr(0,pos);
- if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); }
- else { heirarchy = ""; }
+
+ if (pos == -1) { //you can't find another ;
+ currentLevel = heirarchy;
+ heirarchy = "";
+ m->mothurOut(seqname + " is missing a ;, please check for other errors."); m->mothurOutEndLine();
+ }else{
+ currentLevel=heirarchy.substr(0,pos);
+ if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); }
+ else { heirarchy = ""; }
+ }
+
}
return currentLevel;
}
int level = 1;
tree[0].accessions.push_back(seqName);
- string taxon;// = getNextTaxon(seqTaxonomy);
+ m->removeConfidences(seqTaxonomy);
+ string taxon;// = getNextTaxon(seqTaxonomy);
+
while(seqTaxonomy != ""){
level++;
-
+
if (m->control_pressed) { return 0; }
//somehow the parent is getting one too many accnos
//use print to reassign the taxa id
- taxon = getNextTaxon(seqTaxonomy);
+ taxon = getNextTaxon(seqTaxonomy, seqName);
+
+ if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies[currentNode] = currentNode; } break; }
childPointer = tree[currentNode].children.find(taxon);
tree[currentNode].children[taxon] = numNodes-1;
tree[numNodes-1].parent = currentNode;
- // int numChildren = tree[currentNode].children.size();
- // string heirarchyID = tree[currentNode].heirarchyID;
- // tree[currentNode].accessions.push_back(seqName);
-
currentNode = tree[currentNode].children[taxon];
tree[currentNode].accessions.push_back(seqName);
name2Taxonomy[seqName] = currentNode;
- // tree[currentNode].level = level;
- // tree[currentNode].childNumber = numChildren;
- // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
}
-
+
if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
}
-
+
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "PhyloTree", "addSeqToTree");
if (pid == 0) { binUnclassified(taxFileNameTest); }
#else
- //create file needed for summary if it doesn't exist
- ifstream FileTest(taxFileNameTest.c_str());
-
- if (!FileTest) {
- binUnclassified(taxFileNameTest);
- }
+ binUnclassified(taxFileNameTest);
#endif
}
catch(exception& e) {
try {
ofstream out;
- openOutputFile(file, out);
+ m->openOutputFile(file, out);
map<string, int>::iterator itBin;
map<string, int>::iterator childPointer;
vector<TaxNode> copy = tree;
- int copyNodes = numNodes;
+ //fill out tree
+ fillOutTree(0, copy);
+
+ //get leaf nodes that may need extension
+ for (int i = 0; i < copy.size(); i++) {
+
+ if (copy[i].children.size() == 0) {
+ leafNodes[i] = i;
+ }
+ }
+
+ int copyNodes = copy.size();
+
//go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
- for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
+ map<int, int>::iterator itLeaf;
+ for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
if (m->control_pressed) { out.close(); break; }
- int level = copy[itBin->second].level;
- int currentNode = itBin->second;
+ int level = copy[itLeaf->second].level;
+ int currentNode = itLeaf->second;
//this sequence is unclassified at some levels
- while(level != maxLevel){
+ while(level < maxLevel){
level++;
if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on
currentNode = childPointer->second; //currentNode becomes 'unclassified'
- copy[currentNode].accessions.push_back(itBin->first); //add this seq
}
else{ //otherwise, create it
copy.push_back(TaxNode(taxon));
copy[copyNodes-1].level = copy[currentNode].level + 1;
currentNode = copy[currentNode].children[taxon];
- copy[currentNode].accessions.push_back(itBin->first);
}
}
}
}
}
/**************************************************************************************************/
+void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
+ try {
+
+ map<string,int>::iterator it;
+
+ it = copy[index].children.find("unclassified");
+ if (it == copy[index].children.end()) { //no unclassified at this level
+ string taxon = "unclassified";
+ copy.push_back(TaxNode(taxon));
+ copy[index].children[taxon] = copy.size()-1;
+ copy[copy.size()-1].parent = index;
+ copy[copy.size()-1].level = copy[index].level + 1;
+ }
+
+ if (tree[index].level < maxLevel) {
+ for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
+ fillOutTree(it->second, copy);
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "fillOutTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
string PhyloTree::getFullTaxonomy(string seqName) {
try {
string tax = "";
void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
try {
+
+ //output mothur version
+ out << "#" << m->getVersion() << endl;
+
out << copy.size() << endl;
+ out << maxLevel << endl;
+
for (int i = 0; i < copy.size(); i++) {
-
+
out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
map<string,int>::iterator it;
#endif
ofstream outTree;
- openOutputFile(treefilename, outTree);
+ m->openOutputFile(treefilename, outTree);
+
+ //output mothur version
+ outTree << "#" << m->getVersion() << endl;
//print treenodes
outTree << tree.size() << endl;
for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; }
outTree << endl;
-
outTree.close();
#ifdef USE_MPI
}
}
/**************************************************************************************************/
+TaxNode PhyloTree::get(int i ){
+ try {
+ if (i < tree.size()) { return tree[i]; }
+ else { cout << i << '\t' << tree.size() << endl ; m->mothurOut("Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1); }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "get");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+TaxNode PhyloTree::get(string seqName){
+ try {
+ map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+
+ if (itFind != name2Taxonomy.end()) { return tree[name2Taxonomy[seqName]]; }
+ else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "get");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+string PhyloTree::getName(int i ){
+ try {
+ if (i < tree.size()) { return tree[i].name; }
+ else { m->mothurOut("Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1); }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "get");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int PhyloTree::getIndex(string seqName){
+ try {
+ map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+
+ if (itFind != name2Taxonomy.end()) { return name2Taxonomy[seqName]; }
+ else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "get");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
+ try {
+
+ bool okay = true;
+ templateFileNames.push_back("unknown");
+
+ map<string, int>::iterator itFind;
+ map<string, int> taxonomyFileNames = name2Taxonomy;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); }
+
+ for (int i = 0; i < templateFileNames.size(); i++) {
+ itFind = taxonomyFileNames.find(templateFileNames[i]);
+
+ if (itFind != taxonomyFileNames.end()) { //found it so erase it
+ taxonomyFileNames.erase(itFind);
+ }else {
+ m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+ okay = false;
+ }
+
+ //templateFileNames.erase(templateFileNames.begin()+i);
+ //i--;
+ }
+ templateFileNames.clear();
+
+ if (taxonomyFileNames.size() > 0) { //there are names in tax file that are not in template
+ okay = false;
+
+ for (itFind = taxonomyFileNames.begin(); itFind != taxonomyFileNames.end(); itFind++) {
+ m->mothurOut(itFind->first + " is in your taxonomy file and is not in your template file. Please correct."); m->mothurOutEndLine();
+ }
+ }
+
+ return okay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "ErrorCheck");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+