]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17 vector<string> SharedCommand::getValidParameters(){     
18         try {
19                 vector<string> myArray; 
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "SharedCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 SharedCommand::SharedCommand(){ 
29         try {
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["rabund"] = tempOutNames;
33                 outputTypes["shared"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SharedCommand", "SharedCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SharedCommand::getRequiredParameters(){  
42         try {
43                 vector<string> myArray;
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "SharedCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> SharedCommand::getRequiredFiles(){       
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "SharedCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 SharedCommand::SharedCommand(string o) : outputDir(o) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 
68                 //initialize outputTypes
69                 vector<string> tempOutNames;
70                 outputTypes["rabund"] = tempOutNames;
71                 outputTypes["shared"] = tempOutNames;
72                 
73                 //getting output filename
74                 filename = globaldata->inputFileName;
75                 if (outputDir == "") { outputDir += m->hasPath(filename); }
76                 
77                 filename = outputDir + m->getRootName(m->getSimpleName(filename));
78                 filename = filename + "shared";
79                 outputTypes["shared"].push_back(filename);
80                 
81                 m->openOutputFile(filename, out);
82                 pickedGroups = false;
83                 
84                 groupMap = globaldata->gGroupmap;
85                 
86                 //if hte user has not specified any groups then use them all
87                 if (globaldata->Groups.size() == 0) {
88                         groups = groupMap->namesOfGroups;
89                 }else{ //they have specified groups
90                         groups = globaldata->Groups;
91                         pickedGroups = true;
92                 }
93                 
94                 //fill filehandles with neccessary ofstreams
95                 int i;
96                 ofstream* temp;
97                 for (i=0; i<groups.size(); i++) {
98                         temp = new ofstream;
99                         filehandles[groups[i]] = temp;
100                 }
101                 
102                 //set fileroot
103                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->getListFile()));
104                 
105                 //clears file before we start to write to it below
106                 for (int i=0; i<groups.size(); i++) {
107                         remove((fileroot + groups[i] + ".rabund").c_str());
108                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
109                         outputTypes["rabund"].push_back((fileroot + groups[i] + ".rabund"));
110                 }
111
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "SharedCommand", "SharedCommand");
115                 exit(1);
116         }
117 }
118 //**********************************************************************************************************************
119
120 int SharedCommand::execute(){
121         try {
122                 
123                 //lookup.clear();
124                 string errorOff = "no error";
125                 //errorOff = "";
126                 
127                 //if user provided an order file containing the order the shared file should be in read it
128                 if (globaldata->getOrderGroupFile() != "") { readOrderFile(); }
129                 
130                 //read in listfile
131                 read = new ReadOTUFile(globaldata->inputFileName);      
132                 read->read(&*globaldata); 
133                 delete read;
134
135                 input = globaldata->ginput;
136                 SharedList = globaldata->gSharedList;
137                 string lastLabel = SharedList->getLabel();
138                 vector<SharedRAbundVector*> lookup; 
139                 
140                 if (m->control_pressed) { 
141                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
142                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
143                         out.close(); remove(filename.c_str()); 
144                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
145                         return 1; 
146                 }
147                                 
148                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
149                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
150                         
151                         out.close();
152                         remove(filename.c_str()); //remove blank shared file you made
153                         
154                         createMisMatchFile();
155                         
156                         //delete memory
157                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
158                                 delete it3->second;
159                         }
160                         delete input;
161                         globaldata->ginput = NULL;
162                         delete SharedList;
163                         globaldata->gSharedList = NULL;
164                         
165                         return 1; 
166                 }
167                 
168                 //if user has specified groups make new groupfile for them
169                 if (globaldata->Groups.size() != 0) { //make new group file
170                         string groups = "";
171                         for (int i = 0; i < globaldata->Groups.size(); i++) {
172                                 groups += globaldata->Groups[i] + ".";
173                         }
174                 
175                         string newGroupFile = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + groups + "groups";
176                         ofstream outGroups;
177                         m->openOutputFile(newGroupFile, outGroups);
178                 
179                         vector<string> names = groupMap->getNamesSeqs();
180                         string groupName;
181                         for (int i = 0; i < names.size(); i++) {
182                                 groupName = groupMap->getGroup(names[i]);
183                                 if (isValidGroup(groupName, globaldata->Groups)) {
184                                         outGroups << names[i] << '\t' << groupName << endl;
185                                 }
186                         }
187                         outGroups.close();
188                 }
189                 
190                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
191                 set<string> processedLabels;
192                 set<string> userLabels = globaldata->labels;    
193         
194                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
195                         if (m->control_pressed) { 
196                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
197                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
198                                 out.close(); remove(filename.c_str()); 
199                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
200                                 return 1; 
201                         }
202                 
203                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
204                                         
205                                         lookup = SharedList->getSharedRAbundVector();
206                                         
207                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
208                                         if (pickedGroups) { //check for otus with no seqs in them
209                                                 eliminateZeroOTUS(lookup);
210                                         }
211                                         
212                                         if (m->control_pressed) { 
213                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
214                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
215                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
216                                                 out.close(); remove(filename.c_str()); 
217                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
218                                                 return 1; 
219                                         }
220                                         
221                                         printSharedData(lookup); //prints info to the .shared file
222                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
223                                 
224                                         processedLabels.insert(SharedList->getLabel());
225                                         userLabels.erase(SharedList->getLabel());
226                         }
227                         
228                         if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
229                                         string saveLabel = SharedList->getLabel();
230                                         
231                                         delete SharedList;
232                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
233                                         
234                                         lookup = SharedList->getSharedRAbundVector();
235                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
236                                         if (pickedGroups) { //check for otus with no seqs in them
237                                                 eliminateZeroOTUS(lookup);
238                                         }
239                                         
240                                         
241                                         if (m->control_pressed) { 
242                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
243                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
244                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
245                                                 out.close(); remove(filename.c_str()); 
246                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
247                                                 return 1; 
248                                         }
249                                         
250                                         printSharedData(lookup); //prints info to the .shared file
251                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
252                                         
253                                         processedLabels.insert(SharedList->getLabel());
254                                         userLabels.erase(SharedList->getLabel());
255                                         
256                                         //restore real lastlabel to save below
257                                         SharedList->setLabel(saveLabel);
258                         }
259                         
260                 
261                         lastLabel = SharedList->getLabel();
262                                 
263                         delete SharedList;
264                         SharedList = input->getSharedListVector(); //get new list vector to process
265                 }
266                 
267                 //output error messages about any remaining user labels
268                 set<string>::iterator it;
269                 bool needToRun = false;
270                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
271                         if (processedLabels.count(lastLabel) != 1) {
272                                 needToRun = true;
273                         }
274                 }
275                 
276                 //run last label if you need to
277                 if (needToRun == true)  {
278                         if (SharedList != NULL) {       delete SharedList;      }
279                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
280                                         
281                         lookup = SharedList->getSharedRAbundVector();
282                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
283                         if (pickedGroups) { //check for otus with no seqs in them
284                                 eliminateZeroOTUS(lookup);
285                         }
286                         
287                         if (m->control_pressed) { 
288                                         delete input;  globaldata->ginput = NULL; 
289                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
290                                         out.close(); remove(filename.c_str()); 
291                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
292                                         return 1; 
293                         }
294                         
295                         printSharedData(lookup); //prints info to the .shared file
296                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
297                         delete SharedList;
298                 }
299                 
300                 globaldata->gSharedList = NULL;
301                 
302                 out.close();
303                 
304                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
305                         delete it3->second;
306                 }
307
308                 
309                 //change format to shared  to speed up commands
310                 globaldata->setFormat("sharedfile");
311                 globaldata->setListFile("");
312                 globaldata->setGroupFile("");
313                 globaldata->setSharedFile(filename);
314                 
315                 
316                 if (m->control_pressed) { 
317                                 delete input;  globaldata->ginput = NULL; 
318                                 remove(filename.c_str()); 
319                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
320                                 return 1; 
321                 }
322                 
323                 m->mothurOutEndLine();
324                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
326                 m->mothurOut(filename); m->mothurOutEndLine();
327                 m->mothurOutEndLine();
328                 
329                 return 0;
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "SharedCommand", "execute");
333                 exit(1);
334         }
335 }
336 //**********************************************************************************************************************
337 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
338         try {
339                 
340                 if (order.size() == 0) { //user has not specified an order so do aplabetically
341                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
342                         
343                         globaldata->Groups.clear();
344                         
345                         //initialize bin values
346                         for (int i = 0; i < thislookup.size(); i++) {
347                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
348                                 thislookup[i]->print(out);
349                                 
350                                 globaldata->Groups.push_back(thislookup[i]->getGroup());
351                                 
352                                 RAbundVector rav = thislookup[i]->getRAbundVector();
353                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
354                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
355                                 (*(filehandles[thislookup[i]->getGroup()])).close();
356                         }
357                 }else{
358                         //create a map from groupName to each sharedrabund
359                         map<string, SharedRAbundVector*> myMap;
360                         map<string, SharedRAbundVector*>::iterator myIt;
361                         
362                         for (int i = 0; i < thislookup.size(); i++) {
363                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
364                         }
365                         
366                         globaldata->Groups.clear();
367                         
368                         //loop through ordered list and print the rabund
369                         for (int i = 0; i < order.size(); i++) {
370                                 myIt = myMap.find(order[i]);
371                                 
372                                 if(myIt != myMap.end()) { //we found it
373                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
374                                         (myIt->second)->print(out);
375                                         
376                                         globaldata->Groups.push_back((myIt->second)->getGroup());
377                                 
378                                         RAbundVector rav = (myIt->second)->getRAbundVector();
379                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
380                                         rav.print(*(filehandles[(myIt->second)->getGroup()]));
381                                         (*(filehandles[(myIt->second)->getGroup()])).close();
382                                 }else{
383                                         m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
384                                 }
385                         }
386                 
387                 }
388  
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "SharedCommand", "printSharedData");
392                 exit(1);
393         }
394 }
395 //**********************************************************************************************************************
396 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
397         try {
398                 
399                 vector<SharedRAbundVector*> newLookup;
400                 for (int i = 0; i < thislookup.size(); i++) {
401                         SharedRAbundVector* temp = new SharedRAbundVector();
402                         temp->setLabel(thislookup[i]->getLabel());
403                         temp->setGroup(thislookup[i]->getGroup());
404                         newLookup.push_back(temp);
405                 }
406                 
407                 //for each bin
408                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
409                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
410                 
411                         //look at each sharedRabund and make sure they are not all zero
412                         bool allZero = true;
413                         for (int j = 0; j < thislookup.size(); j++) {
414                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
415                         }
416                         
417                         //if they are not all zero add this bin
418                         if (!allZero) {
419                                 for (int j = 0; j < thislookup.size(); j++) {
420                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
421                                 }
422                         }
423                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
424                 }
425         
426                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
427                 thislookup = newLookup;
428                 
429                 return 0;
430  
431         }
432         catch(exception& e) {
433                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
434                 exit(1);
435         }
436 }
437 //**********************************************************************************************************************
438 int SharedCommand::createMisMatchFile() {
439         try {
440                 ofstream outMisMatch;
441                 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
442                 
443                 //you have sequences in your list file that are not in your group file
444                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
445                         outputMisMatchName += "missing.group";
446                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
447                         
448                         m->openOutputFile(outputMisMatchName, outMisMatch);
449                         
450                         map<string, string> listNames;
451                         map<string, string>::iterator itList;
452                         
453                         //go through list and if group returns "not found" output it
454                         for (int i = 0; i < SharedList->getNumBins(); i++) {
455                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
456                         
457                                 string names = SharedList->get(i); 
458                                 
459                                 while (names.find_first_of(',') != -1) { 
460                                         string name = names.substr(0,names.find_first_of(','));
461                                         names = names.substr(names.find_first_of(',')+1, names.length());
462                                         string group = groupMap->getGroup(name);
463                                         
464                                         if(group == "not found") {      outMisMatch << name << endl;  }
465                                         
466                                         itList = listNames.find(name);
467                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
468                                         else { listNames[name] = name; }
469                                 }
470                         
471                                 //get last name
472                                 string group = groupMap->getGroup(names);
473                                 if(group == "not found") {      outMisMatch << names << endl;  }        
474                                 
475                                 itList = listNames.find(names);
476                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
477                                 else { listNames[names] = names; }
478
479                         }
480                         
481                         outMisMatch.close();
482                         
483                 
484                 }else {//you have sequences in your group file that are not in you list file
485                         
486                         outputMisMatchName += "missing.name";
487                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
488                         
489                         map<string, string> namesInList;
490                         map<string, string>::iterator itList;
491                         
492                         //go through listfile and get names
493                         for (int i = 0; i < SharedList->getNumBins(); i++) {
494                                 if (m->control_pressed) {  return 0; } 
495
496                                 
497                                 string names = SharedList->get(i); 
498                 
499                                 while (names.find_first_of(',') != -1) { 
500                                         string name = names.substr(0,names.find_first_of(','));
501                                         names = names.substr(names.find_first_of(',')+1, names.length());
502                                         
503                                         itList = namesInList.find(name);
504                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
505
506                                         namesInList[name] = name;
507                                         
508                                 }
509                                 
510                                 itList = namesInList.find(names);
511                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
512
513                                 //get last name
514                                 namesInList[names] = names;                             
515                         }
516                         
517                         //get names of sequences in groupfile
518                         vector<string> seqNames = groupMap->getNamesSeqs();
519                 
520                         map<string, string>::iterator itMatch;
521                         
522                         m->openOutputFile(outputMisMatchName, outMisMatch);
523                         
524                         //loop through names in seqNames and if they aren't in namesIn list output them
525                         for (int i = 0; i < seqNames.size(); i++) {
526                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
527                                 
528                                 itMatch = namesInList.find(seqNames[i]);
529                                 
530                                 if (itMatch == namesInList.end()) {
531                                 
532                                         outMisMatch << seqNames[i] << endl; 
533                                 }
534                         }               
535                         outMisMatch.close();
536                 }
537                 
538                 return 0;
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
542                 exit(1);
543         }
544 }
545
546 //**********************************************************************************************************************
547
548 SharedCommand::~SharedCommand(){
549         //delete list;
550         
551         
552 }
553 //**********************************************************************************************************************
554 int SharedCommand::readOrderFile() {
555         try {
556                 //remove old names
557                 order.clear();
558                 
559                 ifstream in;
560                 m->openInputFile(globaldata->getOrderGroupFile(), in);
561                 string thisGroup;
562                 
563                 while(!in.eof()){
564                         in >> thisGroup; m->gobble(in);
565                                                 
566                         order.push_back(thisGroup);
567                         
568                         if (m->control_pressed) { order.clear(); break; }
569                 }
570                 in.close();             
571                 
572                 return 0;
573         }
574         catch(exception& e) {
575                 m->errorOut(e, "SharedCommand", "readOrderFile");
576                 exit(1);
577         }
578 }
579 //**********************************************************************************************************************
580
581 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
582         try {
583                 for (int i = 0; i < groups.size(); i++) {
584                         if (groupname == groups[i]) { return true; }
585                 }
586                 
587                 return false;
588         }
589         catch(exception& e) {
590                 m->errorOut(e, "SharedCommand", "isValidGroup");
591                 exit(1);
592         }
593 }
594 /************************************************************/
595
596