]> git.donarmstrong.com Git - mothur.git/blob - readtreecommand.cpp
added merge.groups command
[mothur.git] / readtreecommand.cpp
1 /*
2  *  readtreecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/23/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtreecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ReadTreeCommand::getValidParameters(){   
14         try {
15                 string Array[] =  {"tree","group","name","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ReadTreeCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 vector<string> ReadTreeCommand::getRequiredParameters(){        
26         try {
27                 string Array[] =  {"tree"};
28                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ReadTreeCommand", "getRequiredParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 vector<string> ReadTreeCommand::getRequiredFiles(){     
38         try {
39                 vector<string> myArray;
40                 return myArray;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ReadTreeCommand", "getRequiredFiles");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 ReadTreeCommand::ReadTreeCommand(string option)  {
49         try {
50                 globaldata = GlobalData::getInstance();
51                 abort = false;
52                                 
53                 //allow user to run help
54                 if(option == "help") { help(); abort = true; }
55                 
56                 else {
57                         //valid paramters for this command
58                         string Array[] =  {"tree","group","name","outputdir","inputdir"};
59                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
60                         
61                         OptionParser parser(option);
62                         map<string, string> parameters = parser.getParameters();
63                         
64                         ValidParameters validParameter;
65                         map<string, string>::iterator it;
66                 
67                         //check to make sure all parameters are valid for command
68                         for (it = parameters.begin(); it != parameters.end(); it++) { 
69                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
70                         }
71                         
72                         globaldata->newRead();
73                         
74                         //if the user changes the input directory command factory will send this info to us in the output parameter 
75                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
76                         if (inputDir == "not found"){   inputDir = "";          }
77                         else {
78                                 string path;
79                                 it = parameters.find("tree");
80                                 //user has given a template file
81                                 if(it != parameters.end()){ 
82                                         path = m->hasPath(it->second);
83                                         //if the user has not given a path then, add inputdir. else leave path alone.
84                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
85                                 }
86                                 
87                                 it = parameters.find("group");
88                                 //user has given a template file
89                                 if(it != parameters.end()){ 
90                                         path = m->hasPath(it->second);
91                                         //if the user has not given a path then, add inputdir. else leave path alone.
92                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
93                                 }
94                                 
95                                 it = parameters.find("name");
96                                 //user has given a template file
97                                 if(it != parameters.end()){ 
98                                         path = m->hasPath(it->second);
99                                         //if the user has not given a path then, add inputdir. else leave path alone.
100                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
101                                 }
102
103                         }
104
105                         
106                         //check for required parameters
107                         treefile = validParameter.validFile(parameters, "tree", true);
108                         if (treefile == "not open") { abort = true; }
109                         else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the read.tree command."); m->mothurOutEndLine(); abort = true;  }     
110                         else {  globaldata->setTreeFile(treefile);  globaldata->setFormat("tree");      }
111                         
112                         groupfile = validParameter.validFile(parameters, "group", true);
113                         if (groupfile == "not open") { abort = true; }  
114                         else if (groupfile == "not found") { 
115                                 groupfile = ""; 
116                                 
117                                 m->mothurOut("You have not provided a group file. I am assumming all sequence are from the same group."); m->mothurOutEndLine();        
118                                 
119                                 if (treefile != "") {  Tree* tree = new Tree(treefile); delete tree;  } //extracts names from tree to make faked out groupmap
120                                 
121                                 globaldata->setGroupFile(groupfile); 
122                                 //read in group map info.
123                                 treeMap = new TreeMap();
124                                 for (int i = 0; i < globaldata->Treenames.size(); i++) { treeMap->addSeq(globaldata->Treenames[i], "Group1"); }
125                                 globaldata->gTreemap = treeMap;
126                                         
127                         }else {  
128                                 globaldata->setGroupFile(groupfile); 
129                                 //read in group map info.
130                                 treeMap = new TreeMap(groupfile);
131                                 treeMap->readMap();
132                                 globaldata->gTreemap = treeMap;
133                         }
134                         
135                         namefile = validParameter.validFile(parameters, "name", true);
136                         if (namefile == "not open") { abort = true; }
137                         else if (namefile == "not found") { namefile = ""; }
138                         else { readNamesFile(); }       
139                         
140                         if (abort == false) {
141                                 filename = treefile;
142                                 read = new ReadNewickTree(filename);
143                         }
144                                                 
145                 }
146         }
147         catch(exception& e) {
148                 m->errorOut(e, "ReadTreeCommand", "ReadTreeCommand");           
149                 exit(1);
150         }
151 }
152 //**********************************************************************************************************************
153
154 void ReadTreeCommand::help(){
155         try {
156                 m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n");
157                 m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
158                 m->mothurOut("The read.tree command parameters are tree, group and name.\n");
159                 m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
160                 m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n");
161                 m->mothurOut("The name parameter allows you to enter a namefile.\n");
162                 m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n");
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "ReadTreeCommand", "help");      
166                 exit(1);
167         }
168 }
169
170 //**********************************************************************************************************************
171
172 ReadTreeCommand::~ReadTreeCommand(){
173         if (abort == false) { delete read; }
174 }
175
176 //**********************************************************************************************************************
177
178 int ReadTreeCommand::execute(){
179         try {
180         
181                 if (abort == true) { return 0; }
182                 
183                 int readOk;
184                 
185                 readOk = read->read(); 
186                 
187                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; }
188                 
189                 vector<Tree*> T = globaldata->gTree;
190
191                 //assemble users trees
192                 for (int i = 0; i < T.size(); i++) {
193                         if (m->control_pressed) {  
194                                 for (int i = 0; i < T.size(); i++) {  delete T[i];  }
195                                 globaldata->gTree.clear();
196                                 delete globaldata->gTreemap;
197                                 return 0;
198                         }
199         
200                         T[i]->assembleTree();
201                 }
202
203                 
204                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
205                 int numNamesInTree;
206                 if (namefile != "")  {  
207                         if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
208                         else {   numNamesInTree = globaldata->Treenames.size();  }
209                 }else {  numNamesInTree = globaldata->Treenames.size();  }
210                 
211                 
212                 //output any names that are in group file but not in tree
213                 if (numNamesInTree < treeMap->getNumSeqs()) {
214                         for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
215                                 //is that name in the tree?
216                                 int count = 0;
217                                 for (int j = 0; j < globaldata->Treenames.size(); j++) {
218                                         if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
219                                         count++;
220                                 }
221                                 
222                                 if (m->control_pressed) {  
223                                         for (int i = 0; i < T.size(); i++) {  delete T[i];  }
224                                         globaldata->gTree.clear();
225                                         delete globaldata->gTreemap;
226                                         return 0;
227                                 }
228                                 
229                                 //then you did not find it so report it 
230                                 if (count == globaldata->Treenames.size()) { 
231                                         //if it is in your namefile then don't remove
232                                         map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
233                                         
234                                         if (it == nameMap.end()) {
235                                                 m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
236                                                 treeMap->removeSeq(treeMap->namesOfSeqs[i]);
237                                                 i--; //need this because removeSeq removes name from namesOfSeqs
238                                         }
239                                 }
240                         }
241                         
242                         globaldata->gTreemap = treeMap;
243                 }
244
245                 return 0;
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "ReadTreeCommand", "execute");   
249                 exit(1);
250         }
251 }
252 /*****************************************************************/
253 int ReadTreeCommand::readNamesFile() {
254         try {
255                 globaldata->names.clear();
256                 numUniquesInName = 0;
257                 
258                 ifstream in;
259                 m->openInputFile(namefile, in);
260                 
261                 string first, second;
262                 map<string, string>::iterator itNames;
263                 
264                 while(!in.eof()) {
265                         in >> first >> second; m->gobble(in);
266                         
267                         numUniquesInName++;
268
269                         itNames = globaldata->names.find(first);
270                         if (itNames == globaldata->names.end()) {  
271                                 globaldata->names[first] = second; 
272                                 
273                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
274                                 vector<string> dupNames;
275                                 m->splitAtComma(second, dupNames);
276                                 
277                                 for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  if ((groupfile == "") && (i != 0)) { globaldata->gTreemap->addSeq(dupNames[i], "Group1"); }  }
278                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
279                 }
280                 in.close();
281                 
282                 return 0;
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "ReadTreeCommand", "readNamesFile");
286                 exit(1);
287         }
288 }
289
290 //**********************************************************************************************************************