]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
added warning about average neighbor merges around cutoff. fixed little bugs.
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11
12 //**********************************************************************************************************************
13
14 SharedCommand::SharedCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 //getting output filename
19                 filename = globaldata->inputFileName;
20                 filename = getRootName(filename);
21                 filename = filename + "shared";
22                 openOutputFile(filename, out);
23                 pickedGroups = false;
24                 
25                 groupMap = globaldata->gGroupmap;
26                 
27                 //if hte user has not specified any groups then use them all
28                 if (globaldata->Groups.size() == 0) {
29                         groups = groupMap->namesOfGroups;
30                 }else{ //they have specified groups
31                         groups = globaldata->Groups;
32                         pickedGroups = true;
33                 }
34                 
35                 //fill filehandles with neccessary ofstreams
36                 int i;
37                 ofstream* temp;
38                 for (i=0; i<groups.size(); i++) {
39                         temp = new ofstream;
40                         filehandles[groups[i]] = temp;
41                 }
42                 
43                 //set fileroot
44                 fileroot = getRootName(globaldata->getListFile());
45                 
46                 //clears file before we start to write to it below
47                 for (int i=0; i<groups.size(); i++) {
48                         remove((fileroot + groups[i] + ".rabund").c_str());
49                 }
50
51         }
52         catch(exception& e) {
53                 errorOut(e, "SharedCommand", "SharedCommand");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 int SharedCommand::execute(){
60         try {
61                 
62                 //lookup.clear();
63                 string errorOff = "no error";
64                 //errorOff = "";
65                         
66                 //read in listfile
67                 read = new ReadOTUFile(globaldata->inputFileName);      
68                 read->read(&*globaldata); 
69                 delete read;
70
71                 input = globaldata->ginput;
72                 SharedList = globaldata->gSharedList;
73                 string lastLabel = SharedList->getLabel();
74                 vector<SharedRAbundVector*> lookup; 
75                 
76                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
77                         mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
78                         
79                         out.close();
80                         remove(filename.c_str()); //remove blank shared file you made
81                         
82                         createMisMatchFile();
83                         
84                         //delete memory
85                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
86                                 delete it3->second;
87                         }
88                         delete SharedList;
89                         globaldata->gSharedList = NULL;
90                         
91                         return 1; 
92                 }
93                 
94                 //if user has specified groups make new groupfile for them
95                 if (globaldata->Groups.size() != 0) { //make new group file
96                         string groups = "";
97                         for (int i = 0; i < globaldata->Groups.size(); i++) {
98                                 groups += globaldata->Groups[i] + ".";
99                         }
100                 
101                         string newGroupFile = getRootName(globaldata->inputFileName) + groups + "groups";
102                         ofstream outGroups;
103                         openOutputFile(newGroupFile, outGroups);
104                 
105                         vector<string> names = groupMap->getNamesSeqs();
106                         string groupName;
107                         for (int i = 0; i < names.size(); i++) {
108                                 groupName = groupMap->getGroup(names[i]);
109                                 if (isValidGroup(groupName, globaldata->Groups)) {
110                                         outGroups << names[i] << '\t' << groupName << endl;
111                                 }
112                         }
113                         outGroups.close();
114                 }
115                 
116                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
117                 set<string> processedLabels;
118                 set<string> userLabels = globaldata->labels;    
119         
120                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
121                 
122                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
123                         
124                                         lookup = SharedList->getSharedRAbundVector();
125                                         if (pickedGroups) { //check for otus with no seqs in them
126                                                 eliminateZeroOTUS(lookup);
127                                         }
128                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
129                                         
130                                         printSharedData(lookup); //prints info to the .shared file
131                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
132                                 
133                                         processedLabels.insert(SharedList->getLabel());
134                                         userLabels.erase(SharedList->getLabel());
135                         }
136                         
137                         if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
138                                         string saveLabel = SharedList->getLabel();
139                                         
140                                         delete SharedList;
141                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
142                                         
143                                         lookup = SharedList->getSharedRAbundVector();
144                                         if (pickedGroups) { //check for otus with no seqs in them
145                                                 eliminateZeroOTUS(lookup);
146                                         }
147                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
148                                         
149                                         printSharedData(lookup); //prints info to the .shared file
150                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
151                                         
152                                         processedLabels.insert(SharedList->getLabel());
153                                         userLabels.erase(SharedList->getLabel());
154                                         
155                                         //restore real lastlabel to save below
156                                         SharedList->setLabel(saveLabel);
157                         }
158                         
159                 
160                         lastLabel = SharedList->getLabel();
161                                 
162                         delete SharedList;
163                         SharedList = input->getSharedListVector(); //get new list vector to process
164                 }
165                 
166                 //output error messages about any remaining user labels
167                 set<string>::iterator it;
168                 bool needToRun = false;
169                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
170                         if (processedLabels.count(lastLabel) != 1) {
171                                 needToRun = true;
172                         }
173                 }
174                 
175                 //run last label if you need to
176                 if (needToRun == true)  {
177                         if (SharedList != NULL) {       delete SharedList;      }
178                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
179                                         
180                         lookup = SharedList->getSharedRAbundVector();
181                         if (pickedGroups) { //check for otus with no seqs in them
182                                 eliminateZeroOTUS(lookup);
183                         }
184                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
185                         
186                         printSharedData(lookup); //prints info to the .shared file
187                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
188                         delete SharedList;
189                 }
190                 
191                 globaldata->gSharedList = NULL;
192                 
193                 out.close();
194                 
195                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
196                         delete it3->second;
197                 }
198
199                 
200                 return 0;
201         }
202         catch(exception& e) {
203                 errorOut(e, "SharedCommand", "execute");
204                 exit(1);
205         }
206 }
207 //**********************************************************************************************************************
208 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
209         try {
210                 
211                 //initialize bin values
212                 for (int i = 0; i < thislookup.size(); i++) {
213 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
214                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
215                         thislookup[i]->print(out);
216                         
217                         RAbundVector rav = thislookup[i]->getRAbundVector();
218                         openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
219                         rav.print(*(filehandles[thislookup[i]->getGroup()]));
220                         (*(filehandles[thislookup[i]->getGroup()])).close();
221                 }
222  
223         }
224         catch(exception& e) {
225                 errorOut(e, "SharedCommand", "printSharedData");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230 void SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
231         try {
232                 
233                 vector<SharedRAbundVector*> newLookup;
234                 for (int i = 0; i < thislookup.size(); i++) {
235                         SharedRAbundVector* temp = new SharedRAbundVector();
236                         temp->setLabel(thislookup[i]->getLabel());
237                         temp->setGroup(thislookup[i]->getGroup());
238                         newLookup.push_back(temp);
239                 }
240                 
241                 //for each bin
242                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
243                 
244                         //look at each sharedRabund and make sure they are not all zero
245                         bool allZero = true;
246                         for (int j = 0; j < thislookup.size(); j++) {
247                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
248                         }
249                         
250                         //if they are not all zero add this bin
251                         if (!allZero) {
252                                 for (int j = 0; j < thislookup.size(); j++) {
253                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
254                                 }
255                         }
256                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
257                 }
258         
259                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
260                 thislookup = newLookup;
261         
262  
263         }
264         catch(exception& e) {
265                 errorOut(e, "SharedCommand", "eliminateZeroOTUS");
266                 exit(1);
267         }
268 }
269 //**********************************************************************************************************************
270 void SharedCommand::createMisMatchFile() {
271         try {
272                 ofstream outMisMatch;
273                 string outputMisMatchName = getRootName(globaldata->inputFileName);
274                 
275                 //you have sequences in your list file that are not in your group file
276                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
277                         outputMisMatchName += "missing.group";
278                         mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
279                         
280                         openOutputFile(outputMisMatchName, outMisMatch);
281                         
282                         //go through list and if group returns "not found" output it
283                         for (int i = 0; i < SharedList->getNumBins(); i++) {
284                         
285                                 string names = SharedList->get(i); 
286                                 
287                                 while (names.find_first_of(',') != -1) { 
288                                         string name = names.substr(0,names.find_first_of(','));
289                                         names = names.substr(names.find_first_of(',')+1, names.length());
290                                         string group = groupMap->getGroup(name);
291         cout << name << endl;                           
292                                         if(group == "not found") {      outMisMatch << name << endl;  }
293                                 }
294         cout << names << endl;                  
295                                 //get last name
296                                 string group = groupMap->getGroup(names);
297                                 if(group == "not found") {      outMisMatch << names << endl;  }                                
298                         }
299                         
300                         outMisMatch.close();
301                         
302                 
303                 }else {//you have sequences in your group file that are not in you list file
304                         
305                         outputMisMatchName += "missing.name";
306                         mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
307                         
308                         map<string, string> namesInList;
309                         
310                         //go through listfile and get names
311                         for (int i = 0; i < SharedList->getNumBins(); i++) {
312                                 
313                                 string names = SharedList->get(i); 
314                 
315                                 while (names.find_first_of(',') != -1) { 
316                                         string name = names.substr(0,names.find_first_of(','));
317                                         names = names.substr(names.find_first_of(',')+1, names.length());
318                                         
319                                         namesInList[name] = name;
320                                 }
321                                 
322                                 //get last name
323                                 namesInList[names] = names;                             
324                         }
325                         
326                         //get names of sequences in groupfile
327                         vector<string> seqNames = groupMap->getNamesSeqs();
328                 
329                         map<string, string>::iterator itMatch;
330                         
331                         openOutputFile(outputMisMatchName, outMisMatch);
332                         
333                         //loop through names in seqNames and if they aren't in namesIn list output them
334                         for (int i = 0; i < seqNames.size(); i++) {
335                                 
336                                 itMatch = namesInList.find(seqNames[i]);
337                                 
338                                 if (itMatch == namesInList.end()) {
339                                 
340                                         outMisMatch << seqNames[i] << endl; 
341                                 }
342                         }               
343                         outMisMatch.close();
344                 }
345  
346         }
347         catch(exception& e) {
348                 errorOut(e, "SharedCommand", "createMisMatchFile");
349                 exit(1);
350         }
351 }
352
353 //**********************************************************************************************************************
354
355 SharedCommand::~SharedCommand(){
356         //delete list;
357         
358         
359 }
360
361 //**********************************************************************************************************************
362
363 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
364         try {
365                 for (int i = 0; i < groups.size(); i++) {
366                         if (groupname == groups[i]) { return true; }
367                 }
368                 
369                 return false;
370         }
371         catch(exception& e) {
372                 errorOut(e, "SharedCommand", "isValidGroup");
373                 exit(1);
374         }
375 }
376 /************************************************************/
377
378