]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
fixed phylo.diversity
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17
18 SharedCommand::SharedCommand(string o) : outputDir(o) {
19         try {
20                 globaldata = GlobalData::getInstance();
21                 
22                 //getting output filename
23                 filename = globaldata->inputFileName;
24                 if (outputDir == "") { outputDir += m->hasPath(filename); }
25                 
26                 filename = outputDir + m->getRootName(m->getSimpleName(filename));
27                 filename = filename + "shared";
28                 
29                 m->openOutputFile(filename, out);
30                 pickedGroups = false;
31                 
32                 groupMap = globaldata->gGroupmap;
33                 
34                 //if hte user has not specified any groups then use them all
35                 if (globaldata->Groups.size() == 0) {
36                         groups = groupMap->namesOfGroups;
37                 }else{ //they have specified groups
38                         groups = globaldata->Groups;
39                         pickedGroups = true;
40                 }
41                 
42                 //fill filehandles with neccessary ofstreams
43                 int i;
44                 ofstream* temp;
45                 for (i=0; i<groups.size(); i++) {
46                         temp = new ofstream;
47                         filehandles[groups[i]] = temp;
48                 }
49                 
50                 //set fileroot
51                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->getListFile()));
52                 
53                 //clears file before we start to write to it below
54                 for (int i=0; i<groups.size(); i++) {
55                         remove((fileroot + groups[i] + ".rabund").c_str());
56                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
57                 }
58
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "SharedCommand", "SharedCommand");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66
67 int SharedCommand::execute(){
68         try {
69                 
70                 //lookup.clear();
71                 string errorOff = "no error";
72                 //errorOff = "";
73                 
74                 //if user provided an order file containing the order the shared file should be in read it
75                 if (globaldata->getOrderGroupFile() != "") { readOrderFile(); }
76                 
77                 //read in listfile
78                 read = new ReadOTUFile(globaldata->inputFileName);      
79                 read->read(&*globaldata); 
80                 delete read;
81
82                 input = globaldata->ginput;
83                 SharedList = globaldata->gSharedList;
84                 string lastLabel = SharedList->getLabel();
85                 vector<SharedRAbundVector*> lookup; 
86                 
87                 if (m->control_pressed) { 
88                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
89                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
90                         out.close(); remove(filename.c_str()); 
91                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
92                         return 1; 
93                 }
94                                 
95                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
96                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
97                         
98                         out.close();
99                         remove(filename.c_str()); //remove blank shared file you made
100                         
101                         createMisMatchFile();
102                         
103                         //delete memory
104                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
105                                 delete it3->second;
106                         }
107                         delete input;
108                         globaldata->ginput = NULL;
109                         delete SharedList;
110                         globaldata->gSharedList = NULL;
111                         
112                         return 1; 
113                 }
114                 
115                 //if user has specified groups make new groupfile for them
116                 if (globaldata->Groups.size() != 0) { //make new group file
117                         string groups = "";
118                         for (int i = 0; i < globaldata->Groups.size(); i++) {
119                                 groups += globaldata->Groups[i] + ".";
120                         }
121                 
122                         string newGroupFile = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + groups + "groups";
123                         ofstream outGroups;
124                         m->openOutputFile(newGroupFile, outGroups);
125                 
126                         vector<string> names = groupMap->getNamesSeqs();
127                         string groupName;
128                         for (int i = 0; i < names.size(); i++) {
129                                 groupName = groupMap->getGroup(names[i]);
130                                 if (isValidGroup(groupName, globaldata->Groups)) {
131                                         outGroups << names[i] << '\t' << groupName << endl;
132                                 }
133                         }
134                         outGroups.close();
135                 }
136                 
137                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
138                 set<string> processedLabels;
139                 set<string> userLabels = globaldata->labels;    
140         
141                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
142                         if (m->control_pressed) { 
143                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
144                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
145                                 out.close(); remove(filename.c_str()); 
146                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
147                                 return 1; 
148                         }
149                 
150                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
151                                         
152                                         lookup = SharedList->getSharedRAbundVector();
153                                         
154                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
155                                         if (pickedGroups) { //check for otus with no seqs in them
156                                                 eliminateZeroOTUS(lookup);
157                                         }
158                                         
159                                         if (m->control_pressed) { 
160                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
161                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
162                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
163                                                 out.close(); remove(filename.c_str()); 
164                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
165                                                 return 1; 
166                                         }
167                                         
168                                         printSharedData(lookup); //prints info to the .shared file
169                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
170                                 
171                                         processedLabels.insert(SharedList->getLabel());
172                                         userLabels.erase(SharedList->getLabel());
173                         }
174                         
175                         if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
176                                         string saveLabel = SharedList->getLabel();
177                                         
178                                         delete SharedList;
179                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
180                                         
181                                         lookup = SharedList->getSharedRAbundVector();
182                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
183                                         if (pickedGroups) { //check for otus with no seqs in them
184                                                 eliminateZeroOTUS(lookup);
185                                         }
186                                         
187                                         
188                                         if (m->control_pressed) { 
189                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
190                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
191                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
192                                                 out.close(); remove(filename.c_str()); 
193                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
194                                                 return 1; 
195                                         }
196                                         
197                                         printSharedData(lookup); //prints info to the .shared file
198                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
199                                         
200                                         processedLabels.insert(SharedList->getLabel());
201                                         userLabels.erase(SharedList->getLabel());
202                                         
203                                         //restore real lastlabel to save below
204                                         SharedList->setLabel(saveLabel);
205                         }
206                         
207                 
208                         lastLabel = SharedList->getLabel();
209                                 
210                         delete SharedList;
211                         SharedList = input->getSharedListVector(); //get new list vector to process
212                 }
213                 
214                 //output error messages about any remaining user labels
215                 set<string>::iterator it;
216                 bool needToRun = false;
217                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
218                         if (processedLabels.count(lastLabel) != 1) {
219                                 needToRun = true;
220                         }
221                 }
222                 
223                 //run last label if you need to
224                 if (needToRun == true)  {
225                         if (SharedList != NULL) {       delete SharedList;      }
226                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
227                                         
228                         lookup = SharedList->getSharedRAbundVector();
229                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
230                         if (pickedGroups) { //check for otus with no seqs in them
231                                 eliminateZeroOTUS(lookup);
232                         }
233                         
234                         if (m->control_pressed) { 
235                                         delete input;  globaldata->ginput = NULL; 
236                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
237                                         out.close(); remove(filename.c_str()); 
238                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
239                                         return 1; 
240                         }
241                         
242                         printSharedData(lookup); //prints info to the .shared file
243                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
244                         delete SharedList;
245                 }
246                 
247                 globaldata->gSharedList = NULL;
248                 
249                 out.close();
250                 
251                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
252                         delete it3->second;
253                 }
254
255                 
256                 //change format to shared  to speed up commands
257                 globaldata->setFormat("sharedfile");
258                 globaldata->setListFile("");
259                 globaldata->setGroupFile("");
260                 globaldata->setSharedFile(filename);
261                 
262                 
263                 if (m->control_pressed) { 
264                                 delete input;  globaldata->ginput = NULL; 
265                                 remove(filename.c_str()); 
266                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
267                                 return 1; 
268                 }
269                 
270                 m->mothurOutEndLine();
271                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
273                 m->mothurOut(filename); m->mothurOutEndLine();
274                 m->mothurOutEndLine();
275                 
276                 return 0;
277         }
278         catch(exception& e) {
279                 m->errorOut(e, "SharedCommand", "execute");
280                 exit(1);
281         }
282 }
283 //**********************************************************************************************************************
284 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
285         try {
286                 
287                 if (order.size() == 0) { //user has not specified an order so do aplabetically
288                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
289                         
290                         globaldata->Groups.clear();
291                         
292                         //initialize bin values
293                         for (int i = 0; i < thislookup.size(); i++) {
294                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
295                                 thislookup[i]->print(out);
296                                 
297                                 globaldata->Groups.push_back(thislookup[i]->getGroup());
298                                 
299                                 RAbundVector rav = thislookup[i]->getRAbundVector();
300                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
301                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
302                                 (*(filehandles[thislookup[i]->getGroup()])).close();
303                         }
304                 }else{
305                         //create a map from groupName to each sharedrabund
306                         map<string, SharedRAbundVector*> myMap;
307                         map<string, SharedRAbundVector*>::iterator myIt;
308                         
309                         for (int i = 0; i < thislookup.size(); i++) {
310                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
311                         }
312                         
313                         globaldata->Groups.clear();
314                         
315                         //loop through ordered list and print the rabund
316                         for (int i = 0; i < order.size(); i++) {
317                                 myIt = myMap.find(order[i]);
318                                 
319                                 if(myIt != myMap.end()) { //we found it
320                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
321                                         (myIt->second)->print(out);
322                                         
323                                         globaldata->Groups.push_back((myIt->second)->getGroup());
324                                 
325                                         RAbundVector rav = (myIt->second)->getRAbundVector();
326                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
327                                         rav.print(*(filehandles[(myIt->second)->getGroup()]));
328                                         (*(filehandles[(myIt->second)->getGroup()])).close();
329                                 }else{
330                                         m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
331                                 }
332                         }
333                 
334                 }
335  
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "SharedCommand", "printSharedData");
339                 exit(1);
340         }
341 }
342 //**********************************************************************************************************************
343 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
344         try {
345                 
346                 vector<SharedRAbundVector*> newLookup;
347                 for (int i = 0; i < thislookup.size(); i++) {
348                         SharedRAbundVector* temp = new SharedRAbundVector();
349                         temp->setLabel(thislookup[i]->getLabel());
350                         temp->setGroup(thislookup[i]->getGroup());
351                         newLookup.push_back(temp);
352                 }
353                 
354                 //for each bin
355                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
356                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
357                 
358                         //look at each sharedRabund and make sure they are not all zero
359                         bool allZero = true;
360                         for (int j = 0; j < thislookup.size(); j++) {
361                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
362                         }
363                         
364                         //if they are not all zero add this bin
365                         if (!allZero) {
366                                 for (int j = 0; j < thislookup.size(); j++) {
367                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
368                                 }
369                         }
370                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
371                 }
372         
373                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
374                 thislookup = newLookup;
375                 
376                 return 0;
377  
378         }
379         catch(exception& e) {
380                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
381                 exit(1);
382         }
383 }
384 //**********************************************************************************************************************
385 int SharedCommand::createMisMatchFile() {
386         try {
387                 ofstream outMisMatch;
388                 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
389                 
390                 //you have sequences in your list file that are not in your group file
391                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
392                         outputMisMatchName += "missing.group";
393                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
394                         
395                         m->openOutputFile(outputMisMatchName, outMisMatch);
396                         
397                         map<string, string> listNames;
398                         map<string, string>::iterator itList;
399                         
400                         //go through list and if group returns "not found" output it
401                         for (int i = 0; i < SharedList->getNumBins(); i++) {
402                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
403                         
404                                 string names = SharedList->get(i); 
405                                 
406                                 while (names.find_first_of(',') != -1) { 
407                                         string name = names.substr(0,names.find_first_of(','));
408                                         names = names.substr(names.find_first_of(',')+1, names.length());
409                                         string group = groupMap->getGroup(name);
410                                         
411                                         if(group == "not found") {      outMisMatch << name << endl;  }
412                                         
413                                         itList = listNames.find(name);
414                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
415                                         else { listNames[name] = name; }
416                                 }
417                         
418                                 //get last name
419                                 string group = groupMap->getGroup(names);
420                                 if(group == "not found") {      outMisMatch << names << endl;  }        
421                                 
422                                 itList = listNames.find(names);
423                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
424                                 else { listNames[names] = names; }
425
426                         }
427                         
428                         outMisMatch.close();
429                         
430                 
431                 }else {//you have sequences in your group file that are not in you list file
432                         
433                         outputMisMatchName += "missing.name";
434                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
435                         
436                         map<string, string> namesInList;
437                         map<string, string>::iterator itList;
438                         
439                         //go through listfile and get names
440                         for (int i = 0; i < SharedList->getNumBins(); i++) {
441                                 if (m->control_pressed) {  return 0; } 
442
443                                 
444                                 string names = SharedList->get(i); 
445                 
446                                 while (names.find_first_of(',') != -1) { 
447                                         string name = names.substr(0,names.find_first_of(','));
448                                         names = names.substr(names.find_first_of(',')+1, names.length());
449                                         
450                                         itList = namesInList.find(name);
451                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
452
453                                         namesInList[name] = name;
454                                         
455                                 }
456                                 
457                                 itList = namesInList.find(names);
458                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
459
460                                 //get last name
461                                 namesInList[names] = names;                             
462                         }
463                         
464                         //get names of sequences in groupfile
465                         vector<string> seqNames = groupMap->getNamesSeqs();
466                 
467                         map<string, string>::iterator itMatch;
468                         
469                         m->openOutputFile(outputMisMatchName, outMisMatch);
470                         
471                         //loop through names in seqNames and if they aren't in namesIn list output them
472                         for (int i = 0; i < seqNames.size(); i++) {
473                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
474                                 
475                                 itMatch = namesInList.find(seqNames[i]);
476                                 
477                                 if (itMatch == namesInList.end()) {
478                                 
479                                         outMisMatch << seqNames[i] << endl; 
480                                 }
481                         }               
482                         outMisMatch.close();
483                 }
484                 
485                 return 0;
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
489                 exit(1);
490         }
491 }
492
493 //**********************************************************************************************************************
494
495 SharedCommand::~SharedCommand(){
496         //delete list;
497         
498         
499 }
500 //**********************************************************************************************************************
501 int SharedCommand::readOrderFile() {
502         try {
503                 //remove old names
504                 order.clear();
505                 
506                 ifstream in;
507                 m->openInputFile(globaldata->getOrderGroupFile(), in);
508                 string thisGroup;
509                 
510                 while(!in.eof()){
511                         in >> thisGroup; m->gobble(in);
512                                                 
513                         order.push_back(thisGroup);
514                         
515                         if (m->control_pressed) { order.clear(); break; }
516                 }
517                 in.close();             
518                 
519                 return 0;
520         }
521         catch(exception& e) {
522                 m->errorOut(e, "SharedCommand", "readOrderFile");
523                 exit(1);
524         }
525 }
526 //**********************************************************************************************************************
527
528 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
529         try {
530                 for (int i = 0; i < groups.size(); i++) {
531                         if (groupname == groups[i]) { return true; }
532                 }
533                 
534                 return false;
535         }
536         catch(exception& e) {
537                 m->errorOut(e, "SharedCommand", "isValidGroup");
538                 exit(1);
539         }
540 }
541 /************************************************************/
542
543