2 * rawTrainingDataMaker.cpp
5 * Created by westcott on 4/21/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "rawtrainingdatamaker.h"
12 /**************************************************************************************************/
14 RawTrainingDataMaker::RawTrainingDataMaker(){
16 m = MothurOut::getInstance();
19 tree.push_back(rawTaxNode("Root"));
20 tree[0].rank = "Root";
24 m->errorOut(e, "RawTrainingDataMaker", "RawTrainingDataMaker");
28 /**************************************************************************************************/
30 RawTrainingDataMaker::RawTrainingDataMaker(string tfile){
32 m = MothurOut::getInstance();
35 tree.push_back(rawTaxNode("Root"));
36 tree[0].rank = "Root";
40 openInputFile(tfile, in);
42 //read in users taxonomy file and add sequences to tree
45 in >> name >> tax; gobble(in);
47 addSeqToTree(name, tax);
54 m->errorOut(e, "RawTrainingDataMaker", "RawTrainingDataMaker");
59 /**************************************************************************************************/
61 string RawTrainingDataMaker::getNextTaxon(string& heirarchy){
63 string currentLevel = "";
65 int pos = heirarchy.find_first_of(';');
66 currentLevel=heirarchy.substr(0,pos);
67 if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); }
68 else { heirarchy = ""; }
73 m->errorOut(e, "RawTrainingDataMaker", "getNextTaxon");
78 /**************************************************************************************************/
80 int RawTrainingDataMaker::addSeqToTree(string seqName, string seqTaxonomy){
84 map<string, int>::iterator childPointer;
89 while(seqTaxonomy != ""){
91 if (m->control_pressed) { return 0; }
93 //somehow the parent is getting one too many accnos
94 //use print to reassign the taxa id
95 taxon = getNextTaxon(seqTaxonomy);
97 childPointer = tree[currentNode].children.find(taxon);
99 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
100 currentNode = childPointer->second;
101 }else{ //otherwise, create it
102 tree.push_back(rawTaxNode(taxon));
104 tree[currentNode].children[taxon] = numNodes-1;
105 tree[numNodes-1].parent = currentNode;
107 currentNode = tree[currentNode].children[taxon];
112 catch(exception& e) {
113 m->errorOut(e, "RawTrainingDataMaker", "addSeqToTree");
117 /**************************************************************************************************/
119 void RawTrainingDataMaker::assignRank(int index){
121 map<string,int>::iterator it;
123 string ranks[9] = { "Root","Domain","Kingdom","Phylum","Class","Order","Family","Genus","Species" };
125 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
126 tree[it->second].level = tree[index].level + 1;
128 if (tree[it->second].level > 8) {
129 tree[it->second].rank = ("unknown" + toString(tree[it->second].level));
131 tree[it->second].rank = ranks[tree[it->second].level];
134 //save maxLevel for binning the unclassified seqs
135 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
137 assignRank(it->second);
140 catch(exception& e) {
141 m->errorOut(e, "RawTrainingDataMaker", "assignRank");
145 /**************************************************************************************************/
147 void RawTrainingDataMaker::print(ofstream& out){
149 //string temp = tree[0].name +" " + tree[0].rank;
150 //sanityCheck[temp] = temp;
152 out << "0" << "*" << tree[0].name << "*" << tree[0].parent << "*" << tree[0].level << "*" << tree[0].rank << endl;
156 catch(exception& e) {
157 m->errorOut(e, "RawTrainingDataMaker", "print");
162 /**************************************************************************************************/
164 void RawTrainingDataMaker::print(int i, ofstream& out){
166 map<string,int>::iterator it;
167 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
168 //string temp = tree[it->second].name + " " + tree[it->second].rank;
170 //map<string, string>::iterator itSan;
171 //itSan = sanityCheck.find(temp);
173 //if (itSan == sanityCheck.end()) {
174 out << it->second << "*" << tree[it->second].name << "*" << tree[it->second].parent << "*" << tree[it->second].level << "*" << tree[it->second].rank << endl;
175 //sanityCheck[temp] = temp;
177 print(it->second, out);
180 catch(exception& e) {
181 m->errorOut(e, "RawTrainingDataMaker", "print");
185 /**************************************************************************************************/