X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylotree.cpp;h=3dde18680c625eb816230a8d13774ccfc47032cf;hb=cbaa068e77aeb15bb06f0695a36d8f757977ed64;hp=d085f6c52ce3c733c7666d1c7df28e95a0575647;hpb=956cdff34f2d609a7736838b1631cd7957580b8b;p=mothur.git diff --git a/phylotree.cpp b/phylotree.cpp index d085f6c..3dde186 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -20,6 +20,7 @@ PhyloTree::PhyloTree(){ tree[0].heirarchyID = "0"; maxLevel = 0; calcTotals = true; + addSeqToTree("unknown", "unknown;"); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); @@ -54,22 +55,25 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ istringstream iss (tempBuf,istringstream::in); delete buffer; - iss >> numNodes; gobble(iss); + //read version + m->getline(iss); m->gobble(iss); + + iss >> numNodes; m->gobble(iss); tree.resize(numNodes); for (int i = 0; i < tree.size(); i++) { - iss >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(iss); + iss >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(iss); } //read genus nodes int numGenus = 0; - iss >> numGenus; gobble(iss); + iss >> numGenus; m->gobble(iss); int gnode, gsize; totals.clear(); for (int i = 0; i < numGenus; i++) { - iss >> gnode >> gsize; gobble(iss); + iss >> gnode >> gsize; m->gobble(iss); uniqueTaxonomies[gnode] = gnode; totals.push_back(gsize); @@ -78,22 +82,25 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ MPI_File_close(&inMPI); #else - in >> numNodes; gobble(in); + //read version + string line = m->getline(in); m->gobble(in); + + in >> numNodes; m->gobble(in); tree.resize(numNodes); for (int i = 0; i < tree.size(); i++) { - in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in); + in >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(in); } //read genus nodes int numGenus = 0; - in >> numGenus; gobble(in); + in >> numGenus; m->gobble(in); int gnode, gsize; totals.clear(); for (int i = 0; i < numGenus; i++) { - in >> gnode >> gsize; gobble(in); + in >> gnode >> gsize; m->gobble(in); uniqueTaxonomies[gnode] = gnode; totals.push_back(gsize); @@ -121,11 +128,10 @@ PhyloTree::PhyloTree(string tfile){ maxLevel = 0; calcTotals = true; string name, tax; - #ifdef USE_MPI int pid, num, processors; - vector positions; + vector positions; MPI_Status status; MPI_File inMPI; @@ -138,7 +144,7 @@ PhyloTree::PhyloTree(string tfile){ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer if (pid == 0) { - positions = setFilePosEachLine(tfile, num); + positions = m->setFilePosEachLine(tfile, num); //send file positions to all processes for(int i = 1; i < processors; i++) { @@ -172,20 +178,26 @@ PhyloTree::PhyloTree(string tfile){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - ifstream in; - openInputFile(tfile, in); - - //read in users taxonomy file and add sequences to tree - while(!in.eof()){ - in >> name >> tax; gobble(in); - - addSeqToTree(name, tax); - } - in.close(); + map temp; + m->readTax(tfile, temp); + + for (map::iterator itTemp = temp.begin(); itTemp != temp.end();) { + addSeqToTree(itTemp->first, itTemp->second); + temp.erase(itTemp++); + } #endif - + assignHeirarchyIDs(0); - + + + string unknownTax = "unknown;"; + //added last taxon until you get desired level + for (int i = 1; i < maxLevel; i++) { + unknownTax += "unclassfied;"; + } + + addSeqToTree("unknown", unknownTax); + //create file for summary if needed setUp(tfile); } @@ -226,7 +238,6 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ try { - numSeqs++; map::iterator childPointer; @@ -235,6 +246,8 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ int level = 1; tree[0].accessions.push_back(seqName); + m->removeConfidences(seqTaxonomy); + string taxon;// = getNextTaxon(seqTaxonomy); while(seqTaxonomy != ""){ @@ -269,6 +282,8 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; } } + + return 0; } catch(exception& e) { m->errorOut(e, "PhyloTree", "addSeqToTree"); @@ -346,12 +361,7 @@ void PhyloTree::setUp(string tfile){ if (pid == 0) { binUnclassified(taxFileNameTest); } #else - //create file needed for summary if it doesn't exist - ifstream FileTest(taxFileNameTest.c_str()); - - if (!FileTest) { - binUnclassified(taxFileNameTest); - } + binUnclassified(taxFileNameTest); #endif } catch(exception& e) { @@ -364,17 +374,17 @@ void PhyloTree::binUnclassified(string file){ try { ofstream out; - openOutputFile(file, out); + m->openOutputFile(file, out); map::iterator itBin; map::iterator childPointer; vector copy = tree; - + //fill out tree fillOutTree(0, copy); - - //get leaf nodes that may need externsion + + //get leaf nodes that may need extension for (int i = 0; i < copy.size(); i++) { if (copy[i].children.size() == 0) { @@ -383,7 +393,7 @@ void PhyloTree::binUnclassified(string file){ } int copyNodes = copy.size(); - + //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary map::iterator itLeaf; for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) { @@ -394,7 +404,7 @@ void PhyloTree::binUnclassified(string file){ int currentNode = itLeaf->second; //this sequence is unclassified at some levels - while(level <= maxLevel){ + while(level < maxLevel){ level++; @@ -432,6 +442,7 @@ void PhyloTree::binUnclassified(string file){ /**************************************************************************************************/ void PhyloTree::fillOutTree(int index, vector& copy) { try { + map::iterator it; it = copy[index].children.find("unclassified"); @@ -443,12 +454,12 @@ void PhyloTree::fillOutTree(int index, vector& copy) { copy[copy.size()-1].level = copy[index].level + 1; } - if (tree[index].level <= maxLevel) { + if (tree[index].level < maxLevel) { for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children fillOutTree(it->second, copy); } } - + } catch(exception& e) { m->errorOut(e, "PhyloTree", "fillOutTree"); @@ -478,10 +489,16 @@ string PhyloTree::getFullTaxonomy(string seqName) { void PhyloTree::print(ofstream& out, vector& copy){ try { + + //output mothur version + out << "#" << m->getVersion() << endl; + out << copy.size() << endl; + out << maxLevel << endl; + for (int i = 0; i < copy.size(); i++) { - + out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t'; map::iterator it; @@ -511,7 +528,10 @@ void PhyloTree::printTreeNodes(string treefilename) { #endif ofstream outTree; - openOutputFile(treefilename, outTree); + m->openOutputFile(treefilename, outTree); + + //output mothur version + outTree << "#" << m->getVersion() << endl; //print treenodes outTree << tree.size() << endl; @@ -591,23 +611,27 @@ bool PhyloTree::ErrorCheck(vector templateFileNames){ try { bool okay = true; + templateFileNames.push_back("unknown"); map::iterator itFind; map taxonomyFileNames = name2Taxonomy; + if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); } + for (int i = 0; i < templateFileNames.size(); i++) { itFind = taxonomyFileNames.find(templateFileNames[i]); if (itFind != taxonomyFileNames.end()) { //found it so erase it taxonomyFileNames.erase(itFind); }else { - m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine(); + m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine(); okay = false; } - templateFileNames.erase(templateFileNames.begin()+i); - i--; + //templateFileNames.erase(templateFileNames.begin()+i); + //i--; } + templateFileNames.clear(); if (taxonomyFileNames.size() > 0) { //there are names in tax file that are not in template okay = false;