]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
finished get.relabund command
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17
18 SharedCommand::SharedCommand(string o) : outputDir(o) {
19         try {
20                 globaldata = GlobalData::getInstance();
21                 
22                 //getting output filename
23                 filename = globaldata->inputFileName;
24                 if (outputDir == "") { outputDir += hasPath(filename); }
25                 
26                 filename = outputDir + getRootName(getSimpleName(filename));
27                 filename = filename + "shared";
28                 
29                 openOutputFile(filename, out);
30                 pickedGroups = false;
31                 
32                 groupMap = globaldata->gGroupmap;
33                 
34                 //if hte user has not specified any groups then use them all
35                 if (globaldata->Groups.size() == 0) {
36                         groups = groupMap->namesOfGroups;
37                 }else{ //they have specified groups
38                         groups = globaldata->Groups;
39                         pickedGroups = true;
40                 }
41                 
42                 //fill filehandles with neccessary ofstreams
43                 int i;
44                 ofstream* temp;
45                 for (i=0; i<groups.size(); i++) {
46                         temp = new ofstream;
47                         filehandles[groups[i]] = temp;
48                 }
49                 
50                 //set fileroot
51                 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
52                 
53                 //clears file before we start to write to it below
54                 for (int i=0; i<groups.size(); i++) {
55                         remove((fileroot + groups[i] + ".rabund").c_str());
56                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
57                 }
58
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "SharedCommand", "SharedCommand");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66
67 int SharedCommand::execute(){
68         try {
69                 
70                 //lookup.clear();
71                 string errorOff = "no error";
72                 //errorOff = "";
73                 
74                 //read in listfile
75                 read = new ReadOTUFile(globaldata->inputFileName);      
76                 read->read(&*globaldata); 
77                 delete read;
78
79                 input = globaldata->ginput;
80                 SharedList = globaldata->gSharedList;
81                 string lastLabel = SharedList->getLabel();
82                 vector<SharedRAbundVector*> lookup; 
83                 
84                 if (m->control_pressed) { 
85                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
86                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
87                         out.close(); remove(filename.c_str()); 
88                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
89                         return 1; 
90                 }
91                                 
92                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
93                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
94                         
95                         out.close();
96                         remove(filename.c_str()); //remove blank shared file you made
97                         
98                         createMisMatchFile();
99                         
100                         //delete memory
101                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
102                                 delete it3->second;
103                         }
104                         delete input;
105                         globaldata->ginput = NULL;
106                         delete SharedList;
107                         globaldata->gSharedList = NULL;
108                         
109                         return 1; 
110                 }
111                 
112                 //if user has specified groups make new groupfile for them
113                 if (globaldata->Groups.size() != 0) { //make new group file
114                         string groups = "";
115                         for (int i = 0; i < globaldata->Groups.size(); i++) {
116                                 groups += globaldata->Groups[i] + ".";
117                         }
118                 
119                         string newGroupFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + groups + "groups";
120                         ofstream outGroups;
121                         openOutputFile(newGroupFile, outGroups);
122                 
123                         vector<string> names = groupMap->getNamesSeqs();
124                         string groupName;
125                         for (int i = 0; i < names.size(); i++) {
126                                 groupName = groupMap->getGroup(names[i]);
127                                 if (isValidGroup(groupName, globaldata->Groups)) {
128                                         outGroups << names[i] << '\t' << groupName << endl;
129                                 }
130                         }
131                         outGroups.close();
132                 }
133                 
134                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
135                 set<string> processedLabels;
136                 set<string> userLabels = globaldata->labels;    
137         
138                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
139                         if (m->control_pressed) { 
140                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
141                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
142                                 out.close(); remove(filename.c_str()); 
143                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
144                                 return 1; 
145                         }
146                 
147                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
148                                         
149                                         lookup = SharedList->getSharedRAbundVector();
150                                         
151                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
152                                         if (pickedGroups) { //check for otus with no seqs in them
153                                                 eliminateZeroOTUS(lookup);
154                                         }
155                                         
156                                         if (m->control_pressed) { 
157                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
158                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
159                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
160                                                 out.close(); remove(filename.c_str()); 
161                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
162                                                 return 1; 
163                                         }
164                                         
165                                         printSharedData(lookup); //prints info to the .shared file
166                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
167                                 
168                                         processedLabels.insert(SharedList->getLabel());
169                                         userLabels.erase(SharedList->getLabel());
170                         }
171                         
172                         if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
173                                         string saveLabel = SharedList->getLabel();
174                                         
175                                         delete SharedList;
176                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
177                                         
178                                         lookup = SharedList->getSharedRAbundVector();
179                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
180                                         if (pickedGroups) { //check for otus with no seqs in them
181                                                 eliminateZeroOTUS(lookup);
182                                         }
183                                         
184                                         
185                                         if (m->control_pressed) { 
186                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
187                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
188                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
189                                                 out.close(); remove(filename.c_str()); 
190                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
191                                                 return 1; 
192                                         }
193                                         
194                                         printSharedData(lookup); //prints info to the .shared file
195                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
196                                         
197                                         processedLabels.insert(SharedList->getLabel());
198                                         userLabels.erase(SharedList->getLabel());
199                                         
200                                         //restore real lastlabel to save below
201                                         SharedList->setLabel(saveLabel);
202                         }
203                         
204                 
205                         lastLabel = SharedList->getLabel();
206                                 
207                         delete SharedList;
208                         SharedList = input->getSharedListVector(); //get new list vector to process
209                 }
210                 
211                 //output error messages about any remaining user labels
212                 set<string>::iterator it;
213                 bool needToRun = false;
214                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
215                         if (processedLabels.count(lastLabel) != 1) {
216                                 needToRun = true;
217                         }
218                 }
219                 
220                 //run last label if you need to
221                 if (needToRun == true)  {
222                         if (SharedList != NULL) {       delete SharedList;      }
223                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
224                                         
225                         lookup = SharedList->getSharedRAbundVector();
226                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
227                         if (pickedGroups) { //check for otus with no seqs in them
228                                 eliminateZeroOTUS(lookup);
229                         }
230                         
231                         if (m->control_pressed) { 
232                                         delete input;  globaldata->ginput = NULL; 
233                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
234                                         out.close(); remove(filename.c_str()); 
235                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
236                                         return 1; 
237                         }
238                         
239                         printSharedData(lookup); //prints info to the .shared file
240                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
241                         delete SharedList;
242                 }
243                 
244                 globaldata->gSharedList = NULL;
245                 
246                 out.close();
247                 
248                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
249                         delete it3->second;
250                 }
251
252                 
253                 //change format to shared  to speed up commands
254                 globaldata->setFormat("sharedfile");
255                 globaldata->setListFile("");
256                 globaldata->setGroupFile("");
257                 globaldata->setSharedFile(filename);
258                 
259                 if (m->control_pressed) { 
260                                 delete input;  globaldata->ginput = NULL; 
261                                 remove(filename.c_str()); 
262                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
263                                 return 1; 
264                 }
265                 
266                 m->mothurOutEndLine();
267                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
268                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
269                 m->mothurOut(filename); m->mothurOutEndLine();
270                 m->mothurOutEndLine();
271                 
272                 return 0;
273         }
274         catch(exception& e) {
275                 m->errorOut(e, "SharedCommand", "execute");
276                 exit(1);
277         }
278 }
279 //**********************************************************************************************************************
280 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
281         try {
282                 
283                 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
284                 
285                 //initialize bin values
286                 for (int i = 0; i < thislookup.size(); i++) {
287 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
288                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
289                         thislookup[i]->print(out);
290                         
291                         RAbundVector rav = thislookup[i]->getRAbundVector();
292                         openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
293                         rav.print(*(filehandles[thislookup[i]->getGroup()]));
294                         (*(filehandles[thislookup[i]->getGroup()])).close();
295                 }
296  
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "SharedCommand", "printSharedData");
300                 exit(1);
301         }
302 }
303 //**********************************************************************************************************************
304 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
305         try {
306                 
307                 vector<SharedRAbundVector*> newLookup;
308                 for (int i = 0; i < thislookup.size(); i++) {
309                         SharedRAbundVector* temp = new SharedRAbundVector();
310                         temp->setLabel(thislookup[i]->getLabel());
311                         temp->setGroup(thislookup[i]->getGroup());
312                         newLookup.push_back(temp);
313                 }
314                 
315                 //for each bin
316                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
317                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
318                 
319                         //look at each sharedRabund and make sure they are not all zero
320                         bool allZero = true;
321                         for (int j = 0; j < thislookup.size(); j++) {
322                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
323                         }
324                         
325                         //if they are not all zero add this bin
326                         if (!allZero) {
327                                 for (int j = 0; j < thislookup.size(); j++) {
328                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
329                                 }
330                         }
331                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
332                 }
333         
334                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
335                 thislookup = newLookup;
336                 
337                 return 0;
338  
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
342                 exit(1);
343         }
344 }
345 //**********************************************************************************************************************
346 int SharedCommand::createMisMatchFile() {
347         try {
348                 ofstream outMisMatch;
349                 string outputMisMatchName = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
350                 
351                 //you have sequences in your list file that are not in your group file
352                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
353                         outputMisMatchName += "missing.group";
354                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
355                         
356                         openOutputFile(outputMisMatchName, outMisMatch);
357                         
358                         map<string, string> listNames;
359                         map<string, string>::iterator itList;
360                         
361                         //go through list and if group returns "not found" output it
362                         for (int i = 0; i < SharedList->getNumBins(); i++) {
363                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
364                         
365                                 string names = SharedList->get(i); 
366                                 
367                                 while (names.find_first_of(',') != -1) { 
368                                         string name = names.substr(0,names.find_first_of(','));
369                                         names = names.substr(names.find_first_of(',')+1, names.length());
370                                         string group = groupMap->getGroup(name);
371                                         
372                                         if(group == "not found") {      outMisMatch << name << endl;  }
373                                         
374                                         itList = listNames.find(name);
375                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
376                                         else { listNames[name] = name; }
377                                 }
378                         
379                                 //get last name
380                                 string group = groupMap->getGroup(names);
381                                 if(group == "not found") {      outMisMatch << names << endl;  }        
382                                 
383                                 itList = listNames.find(names);
384                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
385                                 else { listNames[names] = names; }
386
387                         }
388                         
389                         outMisMatch.close();
390                         
391                 
392                 }else {//you have sequences in your group file that are not in you list file
393                         
394                         outputMisMatchName += "missing.name";
395                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
396                         
397                         map<string, string> namesInList;
398                         map<string, string>::iterator itList;
399                         
400                         //go through listfile and get names
401                         for (int i = 0; i < SharedList->getNumBins(); i++) {
402                                 if (m->control_pressed) {  return 0; } 
403
404                                 
405                                 string names = SharedList->get(i); 
406                 
407                                 while (names.find_first_of(',') != -1) { 
408                                         string name = names.substr(0,names.find_first_of(','));
409                                         names = names.substr(names.find_first_of(',')+1, names.length());
410                                         
411                                         itList = namesInList.find(name);
412                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
413
414                                         namesInList[name] = name;
415                                         
416                                 }
417                                 
418                                 itList = namesInList.find(names);
419                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
420
421                                 //get last name
422                                 namesInList[names] = names;                             
423                         }
424                         
425                         //get names of sequences in groupfile
426                         vector<string> seqNames = groupMap->getNamesSeqs();
427                 
428                         map<string, string>::iterator itMatch;
429                         
430                         openOutputFile(outputMisMatchName, outMisMatch);
431                         
432                         //loop through names in seqNames and if they aren't in namesIn list output them
433                         for (int i = 0; i < seqNames.size(); i++) {
434                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
435                                 
436                                 itMatch = namesInList.find(seqNames[i]);
437                                 
438                                 if (itMatch == namesInList.end()) {
439                                 
440                                         outMisMatch << seqNames[i] << endl; 
441                                 }
442                         }               
443                         outMisMatch.close();
444                 }
445                 
446                 return 0;
447         }
448         catch(exception& e) {
449                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
450                 exit(1);
451         }
452 }
453
454 //**********************************************************************************************************************
455
456 SharedCommand::~SharedCommand(){
457         //delete list;
458         
459         
460 }
461
462 //**********************************************************************************************************************
463
464 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
465         try {
466                 for (int i = 0; i < groups.size(); i++) {
467                         if (groupname == groups[i]) { return true; }
468                 }
469                 
470                 return false;
471         }
472         catch(exception& e) {
473                 m->errorOut(e, "SharedCommand", "isValidGroup");
474                 exit(1);
475         }
476 }
477 /************************************************************/
478
479