]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17
18 SharedCommand::SharedCommand(string o) : outputDir(o) {
19         try {
20                 globaldata = GlobalData::getInstance();
21                 
22                 //getting output filename
23                 filename = globaldata->inputFileName;
24                 if (outputDir == "") { outputDir += m->hasPath(filename); }
25                 
26                 filename = outputDir + m->getRootName(m->getSimpleName(filename));
27                 filename = filename + "shared";
28                 
29                 m->openOutputFile(filename, out);
30                 pickedGroups = false;
31                 
32                 groupMap = globaldata->gGroupmap;
33                 
34                 //if hte user has not specified any groups then use them all
35                 if (globaldata->Groups.size() == 0) {
36                         groups = groupMap->namesOfGroups;
37                 }else{ //they have specified groups
38                         groups = globaldata->Groups;
39                         pickedGroups = true;
40                 }
41                 
42                 //fill filehandles with neccessary ofstreams
43                 int i;
44                 ofstream* temp;
45                 for (i=0; i<groups.size(); i++) {
46                         temp = new ofstream;
47                         filehandles[groups[i]] = temp;
48                 }
49                 
50                 //set fileroot
51                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->getListFile()));
52                 
53                 //clears file before we start to write to it below
54                 for (int i=0; i<groups.size(); i++) {
55                         remove((fileroot + groups[i] + ".rabund").c_str());
56                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
57                 }
58
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "SharedCommand", "SharedCommand");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66
67 int SharedCommand::execute(){
68         try {
69                 
70                 //lookup.clear();
71                 string errorOff = "no error";
72                 //errorOff = "";
73                 
74                 //if user provided an order file containing the order the shared file should be in read it
75                 if (globaldata->getOrderGroupFile() != "") { readOrderFile(); }
76                 
77                 //read in listfile
78                 read = new ReadOTUFile(globaldata->inputFileName);      
79                 read->read(&*globaldata); 
80                 delete read;
81
82                 input = globaldata->ginput;
83                 SharedList = globaldata->gSharedList;
84                 string lastLabel = SharedList->getLabel();
85                 vector<SharedRAbundVector*> lookup; 
86                 
87                 if (m->control_pressed) { 
88                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
89                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
90                         out.close(); remove(filename.c_str()); 
91                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
92                         return 1; 
93                 }
94                                 
95                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
96                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
97                         
98                         out.close();
99                         remove(filename.c_str()); //remove blank shared file you made
100                         
101                         createMisMatchFile();
102                         
103                         //delete memory
104                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
105                                 delete it3->second;
106                         }
107                         delete input;
108                         globaldata->ginput = NULL;
109                         delete SharedList;
110                         globaldata->gSharedList = NULL;
111                         
112                         return 1; 
113                 }
114                 
115                 //if user has specified groups make new groupfile for them
116                 if (globaldata->Groups.size() != 0) { //make new group file
117                         string groups = "";
118                         for (int i = 0; i < globaldata->Groups.size(); i++) {
119                                 groups += globaldata->Groups[i] + ".";
120                         }
121                 
122                         string newGroupFile = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + groups + "groups";
123                         ofstream outGroups;
124                         m->openOutputFile(newGroupFile, outGroups);
125                 
126                         vector<string> names = groupMap->getNamesSeqs();
127                         string groupName;
128                         for (int i = 0; i < names.size(); i++) {
129                                 groupName = groupMap->getGroup(names[i]);
130                                 if (isValidGroup(groupName, globaldata->Groups)) {
131                                         outGroups << names[i] << '\t' << groupName << endl;
132                                 }
133                         }
134                         outGroups.close();
135                 }
136                 
137                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
138                 set<string> processedLabels;
139                 set<string> userLabels = globaldata->labels;    
140         
141                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
142                         if (m->control_pressed) { 
143                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
144                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
145                                 out.close(); remove(filename.c_str()); 
146                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
147                                 return 1; 
148                         }
149                 
150                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
151                                         
152                                         lookup = SharedList->getSharedRAbundVector();
153                                         
154                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
155                                         if (pickedGroups) { //check for otus with no seqs in them
156                                                 eliminateZeroOTUS(lookup);
157                                         }
158                                         
159                                         if (m->control_pressed) { 
160                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
161                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
162                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
163                                                 out.close(); remove(filename.c_str()); 
164                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
165                                                 return 1; 
166                                         }
167                                         
168                                         printSharedData(lookup); //prints info to the .shared file
169                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
170                                 
171                                         processedLabels.insert(SharedList->getLabel());
172                                         userLabels.erase(SharedList->getLabel());
173                         }
174                         
175                         if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
176                                         string saveLabel = SharedList->getLabel();
177                                         
178                                         delete SharedList;
179                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
180                                         
181                                         lookup = SharedList->getSharedRAbundVector();
182                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
183                                         if (pickedGroups) { //check for otus with no seqs in them
184                                                 eliminateZeroOTUS(lookup);
185                                         }
186                                         
187                                         
188                                         if (m->control_pressed) { 
189                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
190                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
191                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
192                                                 out.close(); remove(filename.c_str()); 
193                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
194                                                 return 1; 
195                                         }
196                                         
197                                         printSharedData(lookup); //prints info to the .shared file
198                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
199                                         
200                                         processedLabels.insert(SharedList->getLabel());
201                                         userLabels.erase(SharedList->getLabel());
202                                         
203                                         //restore real lastlabel to save below
204                                         SharedList->setLabel(saveLabel);
205                         }
206                         
207                 
208                         lastLabel = SharedList->getLabel();
209                                 
210                         delete SharedList;
211                         SharedList = input->getSharedListVector(); //get new list vector to process
212                 }
213                 
214                 //output error messages about any remaining user labels
215                 set<string>::iterator it;
216                 bool needToRun = false;
217                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
218                         if (processedLabels.count(lastLabel) != 1) {
219                                 needToRun = true;
220                         }
221                 }
222                 
223                 //run last label if you need to
224                 if (needToRun == true)  {
225                         if (SharedList != NULL) {       delete SharedList;      }
226                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
227                                         
228                         lookup = SharedList->getSharedRAbundVector();
229                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
230                         if (pickedGroups) { //check for otus with no seqs in them
231                                 eliminateZeroOTUS(lookup);
232                         }
233                         
234                         if (m->control_pressed) { 
235                                         delete input;  globaldata->ginput = NULL; 
236                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
237                                         out.close(); remove(filename.c_str()); 
238                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
239                                         return 1; 
240                         }
241                         
242                         printSharedData(lookup); //prints info to the .shared file
243                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
244                         delete SharedList;
245                 }
246                 
247                 globaldata->gSharedList = NULL;
248                 
249                 out.close();
250                 
251                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
252                         delete it3->second;
253                 }
254
255                 
256                 //change format to shared  to speed up commands
257                 globaldata->setFormat("sharedfile");
258                 globaldata->setListFile("");
259                 globaldata->setGroupFile("");
260                 globaldata->setSharedFile(filename);
261                 
262                 if (m->control_pressed) { 
263                                 delete input;  globaldata->ginput = NULL; 
264                                 remove(filename.c_str()); 
265                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
266                                 return 1; 
267                 }
268                 
269                 m->mothurOutEndLine();
270                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
271                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
272                 m->mothurOut(filename); m->mothurOutEndLine();
273                 m->mothurOutEndLine();
274                 
275                 return 0;
276         }
277         catch(exception& e) {
278                 m->errorOut(e, "SharedCommand", "execute");
279                 exit(1);
280         }
281 }
282 //**********************************************************************************************************************
283 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
284         try {
285                 
286                 if (order.size() == 0) { //user has not specified an order so do aplabetically
287                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
288                         
289                         //initialize bin values
290                         for (int i = 0; i < thislookup.size(); i++) {
291                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
292                                 thislookup[i]->print(out);
293                                 
294                                 RAbundVector rav = thislookup[i]->getRAbundVector();
295                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
296                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
297                                 (*(filehandles[thislookup[i]->getGroup()])).close();
298                         }
299                 }else{
300                         //create a map from groupName to each sharedrabund
301                         map<string, SharedRAbundVector*> myMap;
302                         map<string, SharedRAbundVector*>::iterator myIt;
303                         
304                         for (int i = 0; i < thislookup.size(); i++) {
305                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
306                         }
307                         
308                         
309                         //loop through ordered list and print the rabund
310                         for (int i = 0; i < order.size(); i++) {
311                                 myIt = myMap.find(order[i]);
312                                 
313                                 if(myIt != myMap.end()) { //we found it
314                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
315                                         (myIt->second)->print(out);
316                                 
317                                         RAbundVector rav = (myIt->second)->getRAbundVector();
318                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
319                                         rav.print(*(filehandles[(myIt->second)->getGroup()]));
320                                         (*(filehandles[(myIt->second)->getGroup()])).close();
321                                 }else{
322                                         m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
323                                 }
324                         }
325                 
326                 }
327  
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "SharedCommand", "printSharedData");
331                 exit(1);
332         }
333 }
334 //**********************************************************************************************************************
335 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
336         try {
337                 
338                 vector<SharedRAbundVector*> newLookup;
339                 for (int i = 0; i < thislookup.size(); i++) {
340                         SharedRAbundVector* temp = new SharedRAbundVector();
341                         temp->setLabel(thislookup[i]->getLabel());
342                         temp->setGroup(thislookup[i]->getGroup());
343                         newLookup.push_back(temp);
344                 }
345                 
346                 //for each bin
347                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
348                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
349                 
350                         //look at each sharedRabund and make sure they are not all zero
351                         bool allZero = true;
352                         for (int j = 0; j < thislookup.size(); j++) {
353                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
354                         }
355                         
356                         //if they are not all zero add this bin
357                         if (!allZero) {
358                                 for (int j = 0; j < thislookup.size(); j++) {
359                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
360                                 }
361                         }
362                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
363                 }
364         
365                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
366                 thislookup = newLookup;
367                 
368                 return 0;
369  
370         }
371         catch(exception& e) {
372                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
373                 exit(1);
374         }
375 }
376 //**********************************************************************************************************************
377 int SharedCommand::createMisMatchFile() {
378         try {
379                 ofstream outMisMatch;
380                 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
381                 
382                 //you have sequences in your list file that are not in your group file
383                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
384                         outputMisMatchName += "missing.group";
385                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
386                         
387                         m->openOutputFile(outputMisMatchName, outMisMatch);
388                         
389                         map<string, string> listNames;
390                         map<string, string>::iterator itList;
391                         
392                         //go through list and if group returns "not found" output it
393                         for (int i = 0; i < SharedList->getNumBins(); i++) {
394                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
395                         
396                                 string names = SharedList->get(i); 
397                                 
398                                 while (names.find_first_of(',') != -1) { 
399                                         string name = names.substr(0,names.find_first_of(','));
400                                         names = names.substr(names.find_first_of(',')+1, names.length());
401                                         string group = groupMap->getGroup(name);
402                                         
403                                         if(group == "not found") {      outMisMatch << name << endl;  }
404                                         
405                                         itList = listNames.find(name);
406                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
407                                         else { listNames[name] = name; }
408                                 }
409                         
410                                 //get last name
411                                 string group = groupMap->getGroup(names);
412                                 if(group == "not found") {      outMisMatch << names << endl;  }        
413                                 
414                                 itList = listNames.find(names);
415                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
416                                 else { listNames[names] = names; }
417
418                         }
419                         
420                         outMisMatch.close();
421                         
422                 
423                 }else {//you have sequences in your group file that are not in you list file
424                         
425                         outputMisMatchName += "missing.name";
426                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
427                         
428                         map<string, string> namesInList;
429                         map<string, string>::iterator itList;
430                         
431                         //go through listfile and get names
432                         for (int i = 0; i < SharedList->getNumBins(); i++) {
433                                 if (m->control_pressed) {  return 0; } 
434
435                                 
436                                 string names = SharedList->get(i); 
437                 
438                                 while (names.find_first_of(',') != -1) { 
439                                         string name = names.substr(0,names.find_first_of(','));
440                                         names = names.substr(names.find_first_of(',')+1, names.length());
441                                         
442                                         itList = namesInList.find(name);
443                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
444
445                                         namesInList[name] = name;
446                                         
447                                 }
448                                 
449                                 itList = namesInList.find(names);
450                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
451
452                                 //get last name
453                                 namesInList[names] = names;                             
454                         }
455                         
456                         //get names of sequences in groupfile
457                         vector<string> seqNames = groupMap->getNamesSeqs();
458                 
459                         map<string, string>::iterator itMatch;
460                         
461                         m->openOutputFile(outputMisMatchName, outMisMatch);
462                         
463                         //loop through names in seqNames and if they aren't in namesIn list output them
464                         for (int i = 0; i < seqNames.size(); i++) {
465                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
466                                 
467                                 itMatch = namesInList.find(seqNames[i]);
468                                 
469                                 if (itMatch == namesInList.end()) {
470                                 
471                                         outMisMatch << seqNames[i] << endl; 
472                                 }
473                         }               
474                         outMisMatch.close();
475                 }
476                 
477                 return 0;
478         }
479         catch(exception& e) {
480                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
481                 exit(1);
482         }
483 }
484
485 //**********************************************************************************************************************
486
487 SharedCommand::~SharedCommand(){
488         //delete list;
489         
490         
491 }
492 //**********************************************************************************************************************
493 int SharedCommand::readOrderFile() {
494         try {
495                 //remove old names
496                 order.clear();
497                 
498                 ifstream in;
499                 m->openInputFile(globaldata->getOrderGroupFile(), in);
500                 string thisGroup;
501                 
502                 while(!in.eof()){
503                         in >> thisGroup; m->gobble(in);
504                                                 
505                         order.push_back(thisGroup);
506                         
507                         if (m->control_pressed) { order.clear(); break; }
508                 }
509                 in.close();             
510                 
511                 return 0;
512         }
513         catch(exception& e) {
514                 m->errorOut(e, "SharedCommand", "readOrderFile");
515                 exit(1);
516         }
517 }
518 //**********************************************************************************************************************
519
520 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
521         try {
522                 for (int i = 0; i < groups.size(); i++) {
523                         if (groupname == groups[i]) { return true; }
524                 }
525                 
526                 return false;
527         }
528         catch(exception& e) {
529                 m->errorOut(e, "SharedCommand", "isValidGroup");
530                 exit(1);
531         }
532 }
533 /************************************************************/
534
535