]> git.donarmstrong.com Git - mothur.git/blob - readtreecommand.cpp
89efc9abcf0057f68189ddad6975b5e1464fca09
[mothur.git] / readtreecommand.cpp
1 /*
2  *  readtreecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/23/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtreecommand.h"
11
12 //**********************************************************************************************************************
13 ReadTreeCommand::ReadTreeCommand(string option)  {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                                 
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"tree","group","name","outputdir","inputdir"};
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string, string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         globaldata->newRead();
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("tree");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = m->hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
50                                 }
51                                 
52                                 it = parameters.find("group");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = m->hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
58                                 }
59                                 
60                                 it = parameters.find("name");
61                                 //user has given a template file
62                                 if(it != parameters.end()){ 
63                                         path = m->hasPath(it->second);
64                                         //if the user has not given a path then, add inputdir. else leave path alone.
65                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
66                                 }
67
68                         }
69
70                         
71                         //check for required parameters
72                         treefile = validParameter.validFile(parameters, "tree", true);
73                         if (treefile == "not open") { abort = true; }
74                         else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the read.tree command."); m->mothurOutEndLine(); abort = true;  }     
75                         else {  globaldata->setTreeFile(treefile);  globaldata->setFormat("tree");      }
76                         
77                         groupfile = validParameter.validFile(parameters, "group", true);
78                         if (groupfile == "not open") { abort = true; }  
79                         else if (groupfile == "not found") { 
80                                 groupfile = ""; 
81                                 
82                                 m->mothurOut("You have not provided a group file. I am assumming all sequence are from the same group."); m->mothurOutEndLine();        
83                                 
84                                 if (treefile != "") {  Tree* tree = new Tree(treefile); delete tree;  } //extracts names from tree to make faked out groupmap
85                                 
86                                 globaldata->setGroupFile(groupfile); 
87                                 //read in group map info.
88                                 treeMap = new TreeMap();
89                                 for (int i = 0; i < globaldata->Treenames.size(); i++) { treeMap->addSeq(globaldata->Treenames[i], "Group1"); }
90                                 globaldata->gTreemap = treeMap;
91                                         
92                         }else {  
93                                 globaldata->setGroupFile(groupfile); 
94                                 //read in group map info.
95                                 treeMap = new TreeMap(groupfile);
96                                 treeMap->readMap();
97                                 globaldata->gTreemap = treeMap;
98                         }
99                         
100                         namefile = validParameter.validFile(parameters, "name", true);
101                         if (namefile == "not open") { abort = true; }
102                         else if (namefile == "not found") { namefile = ""; }
103                         else { readNamesFile(); }       
104                         
105                         if (abort == false) {
106                                 filename = treefile;
107                                 read = new ReadNewickTree(filename);
108                         }
109                                                 
110                 }
111         }
112         catch(exception& e) {
113                 m->errorOut(e, "ReadTreeCommand", "ReadTreeCommand");           
114                 exit(1);
115         }
116 }
117 //**********************************************************************************************************************
118
119 void ReadTreeCommand::help(){
120         try {
121                 m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n");
122                 m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
123                 m->mothurOut("The read.tree command parameters are tree, group and name.\n");
124                 m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
125                 m->mothurOut("The tree and group parameters are both required.\n");
126                 m->mothurOut("The name parameter allows you to enter a namefile.\n");
127                 m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n");
128         }
129         catch(exception& e) {
130                 m->errorOut(e, "ReadTreeCommand", "help");      
131                 exit(1);
132         }
133 }
134
135 //**********************************************************************************************************************
136
137 ReadTreeCommand::~ReadTreeCommand(){
138         if (abort == false) { delete read; }
139 }
140
141 //**********************************************************************************************************************
142
143 int ReadTreeCommand::execute(){
144         try {
145         
146                 if (abort == true) { return 0; }
147                 
148                 int readOk;
149                 
150                 readOk = read->read(); 
151                 
152                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; }
153                 
154                 vector<Tree*> T = globaldata->gTree;
155
156                 //assemble users trees
157                 for (int i = 0; i < T.size(); i++) {
158                         if (m->control_pressed) {  
159                                 for (int i = 0; i < T.size(); i++) {  delete T[i];  }
160                                 globaldata->gTree.clear();
161                                 delete globaldata->gTreemap;
162                                 return 0;
163                         }
164         
165                         T[i]->assembleTree();
166                 }
167
168                 
169                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
170                 int numNamesInTree;
171                 if (namefile != "")  {  
172                         if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
173                         else {   numNamesInTree = globaldata->Treenames.size();  }
174                 }else {  numNamesInTree = globaldata->Treenames.size();  }
175                 
176                 
177                 //output any names that are in group file but not in tree
178                 if (numNamesInTree < treeMap->getNumSeqs()) {
179                         for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
180                                 //is that name in the tree?
181                                 int count = 0;
182                                 for (int j = 0; j < globaldata->Treenames.size(); j++) {
183                                         if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
184                                         count++;
185                                 }
186                                 
187                                 if (m->control_pressed) {  
188                                         for (int i = 0; i < T.size(); i++) {  delete T[i];  }
189                                         globaldata->gTree.clear();
190                                         delete globaldata->gTreemap;
191                                         return 0;
192                                 }
193                                 
194                                 //then you did not find it so report it 
195                                 if (count == globaldata->Treenames.size()) { 
196                                         //if it is in your namefile then don't remove
197                                         map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
198                                         
199                                         if (it == nameMap.end()) {
200                                                 m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
201                                                 treeMap->removeSeq(treeMap->namesOfSeqs[i]);
202                                                 i--; //need this because removeSeq removes name from namesOfSeqs
203                                         }
204                                 }
205                         }
206                         
207                         globaldata->gTreemap = treeMap;
208                 }
209
210                 return 0;
211         }
212         catch(exception& e) {
213                 m->errorOut(e, "ReadTreeCommand", "execute");   
214                 exit(1);
215         }
216 }
217 /*****************************************************************/
218 int ReadTreeCommand::readNamesFile() {
219         try {
220                 globaldata->names.clear();
221                 numUniquesInName = 0;
222                 
223                 ifstream in;
224                 m->openInputFile(namefile, in);
225                 
226                 string first, second;
227                 map<string, string>::iterator itNames;
228                 
229                 while(!in.eof()) {
230                         in >> first >> second; m->gobble(in);
231                         
232                         numUniquesInName++;
233
234                         itNames = globaldata->names.find(first);
235                         if (itNames == globaldata->names.end()) {  
236                                 globaldata->names[first] = second; 
237                                 
238                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
239                                 vector<string> dupNames;
240                                 m->splitAtComma(second, dupNames);
241                                 
242                                 for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  if ((groupfile == "") && (i != 0)) { globaldata->gTreemap->addSeq(dupNames[i], "Group1"); }  }
243                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
244                 }
245                 in.close();
246                 
247                 return 0;
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "ReadTreeCommand", "readNamesFile");
251                 exit(1);
252         }
253 }
254
255 //**********************************************************************************************************************