X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylotree.cpp;h=8c48e4f1f783c9387c83d4e5d1bfdbb475eb09cf;hb=cd9dbd8b53bbe32af3e9c6bead4aa6d796a278a5;hp=33aedc6395f09fc24576b93e1b72261884d98907;hpb=6f4b9401f7deb8aaf0d87659298308f4138cc3b0;p=mothur.git diff --git a/phylotree.cpp b/phylotree.cpp index 33aedc6..8c48e4f 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -28,34 +28,78 @@ PhyloTree::PhyloTree(){ } /**************************************************************************************************/ -PhyloTree::PhyloTree(ifstream& in){ +PhyloTree::PhyloTree(ifstream& in, string filename){ try { m = MothurOut::getInstance(); calcTotals = false; - in >> numNodes; gobble(in); - - tree.resize(numNodes); - - for (int i = 0; i < tree.size(); i++) { - in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in); - - } - - //read genus nodes - int numGenus = 0; - in >> numGenus; gobble(in); - - int gnode, gsize; - totals.clear(); - for (int i = 0; i < numGenus; i++) { - in >> gnode >> gsize; gobble(in); + #ifdef USE_MPI + MPI_File inMPI; + MPI_Offset size; + MPI_Status status; + + char inFileName[1024]; + strcpy(inFileName, filename.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); + MPI_File_get_size(inMPI, &size); - uniqueTaxonomies[gnode] = gnode; - totals.push_back(gsize); - } + char* buffer = new char[size]; + MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); + + string tempBuf = buffer; + if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } + istringstream iss (tempBuf,istringstream::in); + delete buffer; + + iss >> numNodes; gobble(iss); + + tree.resize(numNodes); + + for (int i = 0; i < tree.size(); i++) { + iss >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(iss); + } - in.close(); + //read genus nodes + int numGenus = 0; + iss >> numGenus; gobble(iss); + + int gnode, gsize; + totals.clear(); + for (int i = 0; i < numGenus; i++) { + iss >> gnode >> gsize; gobble(iss); + + uniqueTaxonomies[gnode] = gnode; + totals.push_back(gsize); + } + + MPI_File_close(&inMPI); + + #else + in >> numNodes; gobble(in); + + tree.resize(numNodes); + + for (int i = 0; i < tree.size(); i++) { + in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in); + } + + //read genus nodes + int numGenus = 0; + in >> numGenus; gobble(in); + + int gnode, gsize; + totals.clear(); + for (int i = 0; i < numGenus; i++) { + in >> gnode >> gsize; gobble(in); + + uniqueTaxonomies[gnode] = gnode; + totals.push_back(gsize); + } + + in.close(); + + #endif } catch(exception& e) { @@ -74,20 +118,70 @@ PhyloTree::PhyloTree(string tfile){ tree[0].heirarchyID = "0"; maxLevel = 0; calcTotals = true; + string name, tax; + - ifstream in; - openInputFile(tfile, in); + #ifdef USE_MPI + int pid, num; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char inFileName[1024]; + strcpy(inFileName, tfile.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { + positions = setFilePosEachLine(tfile, num); + + //send file positions to all processes + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(num); + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } - //read in users taxonomy file and add sequences to tree - string name, tax; - while(!in.eof()){ - in >> name >> tax; gobble(in); + //read file + for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + iss >> name >> tax; + addSeqToTree(name, tax); + } - addSeqToTree(name, tax); - } - in.close(); - + MPI_File_close(&inMPI); + + #else + ifstream in; + openInputFile(tfile, in); + + //read in users taxonomy file and add sequences to tree + while(!in.eof()){ + in >> name >> tax; gobble(in); + + addSeqToTree(name, tax); + } + in.close(); + #endif + assignHeirarchyIDs(0); + + //create file for summary if needed + setUp(tfile); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); @@ -232,15 +326,49 @@ void PhyloTree::assignHeirarchyIDs(int index){ } } /**************************************************************************************************/ -void PhyloTree::binUnclassified(){ +void PhyloTree::setUp(string tfile){ + try{ + string taxFileNameTest = tfile.substr(0,tfile.find_last_of(".")+1) + "tree.sum"; + + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { binUnclassified(taxFileNameTest); } + + #else + //create file needed for summary if it doesn't exist + ifstream FileTest(taxFileNameTest.c_str()); + + if (!FileTest) { + binUnclassified(taxFileNameTest); + } + #endif + } + catch(exception& e) { + m->errorOut(e, "PhyloTree", "setUp"); + exit(1); + } +} +/**************************************************************************************************/ +void PhyloTree::binUnclassified(string file){ try { + + ofstream out; + openOutputFile(file, out); + map::iterator itBin; map::iterator childPointer; + vector copy = tree; + int copyNodes = numNodes; + //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) { - - int level = tree[itBin->second].level; + + if (m->control_pressed) { out.close(); break; } + + int level = copy[itBin->second].level; int currentNode = itBin->second; //this sequence is unclassified at some levels @@ -251,34 +379,30 @@ void PhyloTree::binUnclassified(){ string taxon = "unclassified"; //does the parent have a child names 'unclassified'? - childPointer = tree[currentNode].children.find(taxon); + childPointer = copy[currentNode].children.find(taxon); - if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on + if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; //currentNode becomes 'unclassified' - tree[currentNode].accessions.push_back(itBin->first); //add this seq - name2Taxonomy[itBin->first] = currentNode; + copy[currentNode].accessions.push_back(itBin->first); //add this seq } else{ //otherwise, create it - tree.push_back(TaxNode(taxon)); - numNodes++; - tree[currentNode].children[taxon] = numNodes-1; - tree[numNodes-1].parent = currentNode; + copy.push_back(TaxNode(taxon)); + copyNodes++; + copy[currentNode].children[taxon] = copyNodes-1; + copy[copyNodes-1].parent = currentNode; + copy[copyNodes-1].level = copy[currentNode].level + 1; - currentNode = tree[currentNode].children[taxon]; - tree[currentNode].accessions.push_back(itBin->first); - name2Taxonomy[itBin->first] = currentNode; + currentNode = copy[currentNode].children[taxon]; + copy[currentNode].accessions.push_back(itBin->first); } - - if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; } } } - //clear HeirarchyIDs and reset them - for (int i = 1; i < tree.size(); i++) { - tree[i].heirarchyID = ""; + if (!m->control_pressed) { + //print copy tree + print(out, copy); } - assignHeirarchyIDs(0); - + } catch(exception& e) { m->errorOut(e, "PhyloTree", "binUnclassified"); @@ -306,26 +430,22 @@ string PhyloTree::getFullTaxonomy(string seqName) { } /**************************************************************************************************/ -void PhyloTree::print(ofstream& out){ - try { - out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl; - print(0, out); - } - catch(exception& e) { - m->errorOut(e, "PhyloTree", "print"); - exit(1); - } -} - -/**************************************************************************************************/ - -void PhyloTree::print(int i, ofstream& out){ +void PhyloTree::print(ofstream& out, vector& copy){ try { - map::iterator it; - for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ - out <second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl; - print(it->second, out); + out << copy.size() << endl; + + for (int i = 0; i < copy.size(); i++) { + + out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t'; + + map::iterator it; + for(it=copy[i].children.begin();it!=copy[i].children.end();it++){ + out << it->first << '\t' << it->second << '\t'; + } + out << endl; } + + out.close(); } catch(exception& e) { m->errorOut(e, "PhyloTree", "print"); @@ -335,23 +455,37 @@ void PhyloTree::print(int i, ofstream& out){ /**************************************************************************************************/ void PhyloTree::printTreeNodes(string treefilename) { try { - ofstream outTree; - openOutputFile(treefilename, outTree); - - //print treenodes - outTree << tree.size() << endl; - for (int i = 0; i < tree.size(); i++) { - outTree << tree[i].name << '\t' << tree[i].level << '\t' << tree[i].parent << endl; - } - - //print genus nodes - outTree << endl << uniqueTaxonomies.size() << endl; - map::iterator it2; - for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; } - outTree << endl; + + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + #endif + + ofstream outTree; + openOutputFile(treefilename, outTree); + + //print treenodes + outTree << tree.size() << endl; + for (int i = 0; i < tree.size(); i++) { + outTree << tree[i].name << '\t' << tree[i].level << '\t' << tree[i].parent << endl; + } + + //print genus nodes + outTree << endl << uniqueTaxonomies.size() << endl; + map::iterator it2; + for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; } + outTree << endl; + + + outTree.close(); - outTree.close(); + #ifdef USE_MPI + } + #endif + } catch(exception& e) {