]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
fixes while testing
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17 vector<string> SharedCommand::getValidParameters(){     
18         try {
19                 vector<string> myArray; 
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "SharedCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 SharedCommand::SharedCommand(){ 
29         try {           
30
31                 //initialize outputTypes
32                 vector<string> tempOutNames;
33                 outputTypes["rabund"] = tempOutNames;
34                 outputTypes["shared"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "SharedCommand", "SharedCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> SharedCommand::getRequiredParameters(){  
43         try {
44                 vector<string> myArray;
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SharedCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SharedCommand::getRequiredFiles(){       
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SharedCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 SharedCommand::SharedCommand(string o) : outputDir(o) {
66         try {
67                 globaldata = GlobalData::getInstance();
68                 
69                 //initialize outputTypes
70                 vector<string> tempOutNames;
71                 outputTypes["rabund"] = tempOutNames;
72                 outputTypes["shared"] = tempOutNames;
73                 
74                 //getting output filename
75                 filename = globaldata->inputFileName;
76                 if (outputDir == "") { outputDir += m->hasPath(filename); }
77                 
78                 filename = outputDir + m->getRootName(m->getSimpleName(filename));
79                 filename = filename + "shared";
80                 outputTypes["shared"].push_back(filename);
81                 
82                 m->openOutputFile(filename, out);
83                 pickedGroups = false;
84                 
85                 groupMap = globaldata->gGroupmap;
86                 
87                 //if hte user has not specified any groups then use them all
88                 if (globaldata->Groups.size() == 0) {
89                         groups = groupMap->namesOfGroups;
90                 }else{ //they have specified groups
91                         groups = globaldata->Groups;
92                         pickedGroups = true;
93                 }
94                 
95                 //fill filehandles with neccessary ofstreams
96                 int i;
97                 ofstream* temp;
98                 for (i=0; i<groups.size(); i++) {
99                         temp = new ofstream;
100                         filehandles[groups[i]] = temp;
101                 }
102                 
103                 //set fileroot
104                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->getListFile()));
105                 
106                 //clears file before we start to write to it below
107                 for (int i=0; i<groups.size(); i++) {
108                         remove((fileroot + groups[i] + ".rabund").c_str());
109                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
110                         outputTypes["rabund"].push_back((fileroot + groups[i] + ".rabund"));
111                 }
112
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "SharedCommand", "SharedCommand");
116                 exit(1);
117         }
118 }
119 //**********************************************************************************************************************
120
121 int SharedCommand::execute(){
122         try {
123                 
124                 //lookup.clear();
125                 string errorOff = "no error";
126                 //errorOff = "";
127                 
128                 //if user provided an order file containing the order the shared file should be in read it
129                 if (globaldata->getOrderGroupFile() != "") { readOrderFile(); }
130                 
131                 //read in listfile
132                 read = new ReadOTUFile(globaldata->inputFileName);      
133                 read->read(&*globaldata); 
134                 delete read;
135
136                 input = globaldata->ginput;
137                 SharedList = globaldata->gSharedList;
138                 string lastLabel = SharedList->getLabel();
139                 vector<SharedRAbundVector*> lookup; 
140                 
141                 if (m->control_pressed) { 
142                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
143                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
144                         out.close(); remove(filename.c_str()); 
145                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
146                         return 1; 
147                 }
148                                 
149                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
150                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
151                         
152                         out.close();
153                         remove(filename.c_str()); //remove blank shared file you made
154                         
155                         createMisMatchFile();
156                         
157                         //delete memory
158                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
159                                 delete it3->second;
160                         }
161                         delete input;
162                         globaldata->ginput = NULL;
163                         delete SharedList;
164                         globaldata->gSharedList = NULL;
165                         
166                         return 1; 
167                 }
168                 
169                 //if user has specified groups make new groupfile for them
170                 if (globaldata->Groups.size() != 0) { //make new group file
171                         string groups = "";
172                         for (int i = 0; i < globaldata->Groups.size(); i++) {
173                                 groups += globaldata->Groups[i] + ".";
174                         }
175                 
176                         string newGroupFile = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + groups + "groups";
177                         ofstream outGroups;
178                         m->openOutputFile(newGroupFile, outGroups);
179                 
180                         vector<string> names = groupMap->getNamesSeqs();
181                         string groupName;
182                         for (int i = 0; i < names.size(); i++) {
183                                 groupName = groupMap->getGroup(names[i]);
184                                 if (isValidGroup(groupName, globaldata->Groups)) {
185                                         outGroups << names[i] << '\t' << groupName << endl;
186                                 }
187                         }
188                         outGroups.close();
189                 }
190                 
191                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
192                 set<string> processedLabels;
193                 set<string> userLabels = globaldata->labels;    
194         
195                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
196                         if (m->control_pressed) { 
197                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
198                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
199                                 out.close(); remove(filename.c_str()); 
200                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
201                                 return 1; 
202                         }
203                 
204                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
205                                         
206                                         lookup = SharedList->getSharedRAbundVector();
207                                         
208                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
209                                         if (pickedGroups) { //check for otus with no seqs in them
210                                                 eliminateZeroOTUS(lookup);
211                                         }
212                                         
213                                         if (m->control_pressed) { 
214                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
215                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
216                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
217                                                 out.close(); remove(filename.c_str()); 
218                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
219                                                 return 1; 
220                                         }
221                                         
222                                         printSharedData(lookup); //prints info to the .shared file
223                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
224                                 
225                                         processedLabels.insert(SharedList->getLabel());
226                                         userLabels.erase(SharedList->getLabel());
227                         }
228                         
229                         if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
230                                         string saveLabel = SharedList->getLabel();
231                                         
232                                         delete SharedList;
233                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
234                                         
235                                         lookup = SharedList->getSharedRAbundVector();
236                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
237                                         if (pickedGroups) { //check for otus with no seqs in them
238                                                 eliminateZeroOTUS(lookup);
239                                         }
240                                         
241                                         
242                                         if (m->control_pressed) { 
243                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
244                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
245                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
246                                                 out.close(); remove(filename.c_str()); 
247                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
248                                                 return 1; 
249                                         }
250                                         
251                                         printSharedData(lookup); //prints info to the .shared file
252                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
253                                         
254                                         processedLabels.insert(SharedList->getLabel());
255                                         userLabels.erase(SharedList->getLabel());
256                                         
257                                         //restore real lastlabel to save below
258                                         SharedList->setLabel(saveLabel);
259                         }
260                         
261                 
262                         lastLabel = SharedList->getLabel();
263                                 
264                         delete SharedList;
265                         SharedList = input->getSharedListVector(); //get new list vector to process
266                 }
267                 
268                 //output error messages about any remaining user labels
269                 set<string>::iterator it;
270                 bool needToRun = false;
271                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
272                         if (processedLabels.count(lastLabel) != 1) {
273                                 needToRun = true;
274                         }
275                 }
276                 
277                 //run last label if you need to
278                 if (needToRun == true)  {
279                         if (SharedList != NULL) {       delete SharedList;      }
280                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
281                                         
282                         lookup = SharedList->getSharedRAbundVector();
283                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
284                         if (pickedGroups) { //check for otus with no seqs in them
285                                 eliminateZeroOTUS(lookup);
286                         }
287                         
288                         if (m->control_pressed) { 
289                                         delete input;  globaldata->ginput = NULL; 
290                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
291                                         out.close(); remove(filename.c_str()); 
292                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
293                                         return 1; 
294                         }
295                         
296                         printSharedData(lookup); //prints info to the .shared file
297                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
298                         delete SharedList;
299                 }
300                 
301                 globaldata->gSharedList = NULL;
302                 
303                 out.close();
304                 
305                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
306                         delete it3->second;
307                 }
308
309                 
310                 //change format to shared  to speed up commands
311                 globaldata->setFormat("sharedfile");
312                 globaldata->setListFile("");
313                 globaldata->setGroupFile("");
314                 globaldata->setSharedFile(filename);
315                 
316                 
317                 if (m->control_pressed) { 
318                                 delete input;  globaldata->ginput = NULL; 
319                                 remove(filename.c_str()); 
320                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
321                                 return 1; 
322                 }
323                 
324                 m->mothurOutEndLine();
325                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
326                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
327                 m->mothurOut(filename); m->mothurOutEndLine();
328                 m->mothurOutEndLine();
329                 
330                 return 0;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "SharedCommand", "execute");
334                 exit(1);
335         }
336 }
337 //**********************************************************************************************************************
338 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
339         try {
340                 
341                 if (order.size() == 0) { //user has not specified an order so do aplabetically
342                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
343                         
344                         globaldata->Groups.clear();
345                         
346                         //initialize bin values
347                         for (int i = 0; i < thislookup.size(); i++) {
348                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
349                                 thislookup[i]->print(out);
350                                 
351                                 globaldata->Groups.push_back(thislookup[i]->getGroup());
352                                 
353                                 RAbundVector rav = thislookup[i]->getRAbundVector();
354                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
355                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
356                                 (*(filehandles[thislookup[i]->getGroup()])).close();
357                         }
358                 }else{
359                         //create a map from groupName to each sharedrabund
360                         map<string, SharedRAbundVector*> myMap;
361                         map<string, SharedRAbundVector*>::iterator myIt;
362                         
363                         for (int i = 0; i < thislookup.size(); i++) {
364                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
365                         }
366                         
367                         globaldata->Groups.clear();
368                         
369                         //loop through ordered list and print the rabund
370                         for (int i = 0; i < order.size(); i++) {
371                                 myIt = myMap.find(order[i]);
372                                 
373                                 if(myIt != myMap.end()) { //we found it
374                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
375                                         (myIt->second)->print(out);
376                                         
377                                         globaldata->Groups.push_back((myIt->second)->getGroup());
378                                 
379                                         RAbundVector rav = (myIt->second)->getRAbundVector();
380                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
381                                         rav.print(*(filehandles[(myIt->second)->getGroup()]));
382                                         (*(filehandles[(myIt->second)->getGroup()])).close();
383                                 }else{
384                                         m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
385                                 }
386                         }
387                 
388                 }
389  
390         }
391         catch(exception& e) {
392                 m->errorOut(e, "SharedCommand", "printSharedData");
393                 exit(1);
394         }
395 }
396 //**********************************************************************************************************************
397 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
398         try {
399                 
400                 vector<SharedRAbundVector*> newLookup;
401                 for (int i = 0; i < thislookup.size(); i++) {
402                         SharedRAbundVector* temp = new SharedRAbundVector();
403                         temp->setLabel(thislookup[i]->getLabel());
404                         temp->setGroup(thislookup[i]->getGroup());
405                         newLookup.push_back(temp);
406                 }
407                 
408                 //for each bin
409                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
410                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
411                 
412                         //look at each sharedRabund and make sure they are not all zero
413                         bool allZero = true;
414                         for (int j = 0; j < thislookup.size(); j++) {
415                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
416                         }
417                         
418                         //if they are not all zero add this bin
419                         if (!allZero) {
420                                 for (int j = 0; j < thislookup.size(); j++) {
421                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
422                                 }
423                         }
424                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
425                 }
426         
427                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
428                 thislookup = newLookup;
429                 
430                 return 0;
431  
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
435                 exit(1);
436         }
437 }
438 //**********************************************************************************************************************
439 int SharedCommand::createMisMatchFile() {
440         try {
441                 ofstream outMisMatch;
442                 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
443                 
444                 //you have sequences in your list file that are not in your group file
445                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
446                         outputMisMatchName += "missing.group";
447                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
448                         
449                         m->openOutputFile(outputMisMatchName, outMisMatch);
450                         
451                         map<string, string> listNames;
452                         map<string, string>::iterator itList;
453                         
454                         //go through list and if group returns "not found" output it
455                         for (int i = 0; i < SharedList->getNumBins(); i++) {
456                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
457                         
458                                 string names = SharedList->get(i); 
459                                 
460                                 while (names.find_first_of(',') != -1) { 
461                                         string name = names.substr(0,names.find_first_of(','));
462                                         names = names.substr(names.find_first_of(',')+1, names.length());
463                                         string group = groupMap->getGroup(name);
464                                         
465                                         if(group == "not found") {      outMisMatch << name << endl;  }
466                                         
467                                         itList = listNames.find(name);
468                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
469                                         else { listNames[name] = name; }
470                                 }
471                         
472                                 //get last name
473                                 string group = groupMap->getGroup(names);
474                                 if(group == "not found") {      outMisMatch << names << endl;  }        
475                                 
476                                 itList = listNames.find(names);
477                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
478                                 else { listNames[names] = names; }
479
480                         }
481                         
482                         outMisMatch.close();
483                         
484                 
485                 }else {//you have sequences in your group file that are not in you list file
486                         
487                         outputMisMatchName += "missing.name";
488                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
489                         
490                         map<string, string> namesInList;
491                         map<string, string>::iterator itList;
492                         
493                         //go through listfile and get names
494                         for (int i = 0; i < SharedList->getNumBins(); i++) {
495                                 if (m->control_pressed) {  return 0; } 
496
497                                 
498                                 string names = SharedList->get(i); 
499                 
500                                 while (names.find_first_of(',') != -1) { 
501                                         string name = names.substr(0,names.find_first_of(','));
502                                         names = names.substr(names.find_first_of(',')+1, names.length());
503                                         
504                                         itList = namesInList.find(name);
505                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
506
507                                         namesInList[name] = name;
508                                         
509                                 }
510                                 
511                                 itList = namesInList.find(names);
512                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
513
514                                 //get last name
515                                 namesInList[names] = names;                             
516                         }
517                         
518                         //get names of sequences in groupfile
519                         vector<string> seqNames = groupMap->getNamesSeqs();
520                 
521                         map<string, string>::iterator itMatch;
522                         
523                         m->openOutputFile(outputMisMatchName, outMisMatch);
524                         
525                         //loop through names in seqNames and if they aren't in namesIn list output them
526                         for (int i = 0; i < seqNames.size(); i++) {
527                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
528                                 
529                                 itMatch = namesInList.find(seqNames[i]);
530                                 
531                                 if (itMatch == namesInList.end()) {
532                                 
533                                         outMisMatch << seqNames[i] << endl; 
534                                 }
535                         }               
536                         outMisMatch.close();
537                 }
538                 
539                 return 0;
540         }
541         catch(exception& e) {
542                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
543                 exit(1);
544         }
545 }
546
547 //**********************************************************************************************************************
548
549 SharedCommand::~SharedCommand(){
550         //delete list;
551         
552         
553 }
554 //**********************************************************************************************************************
555 int SharedCommand::readOrderFile() {
556         try {
557                 //remove old names
558                 order.clear();
559                 
560                 ifstream in;
561                 m->openInputFile(globaldata->getOrderGroupFile(), in);
562                 string thisGroup;
563                 
564                 while(!in.eof()){
565                         in >> thisGroup; m->gobble(in);
566                                                 
567                         order.push_back(thisGroup);
568                         
569                         if (m->control_pressed) { order.clear(); break; }
570                 }
571                 in.close();             
572                 
573                 return 0;
574         }
575         catch(exception& e) {
576                 m->errorOut(e, "SharedCommand", "readOrderFile");
577                 exit(1);
578         }
579 }
580 //**********************************************************************************************************************
581
582 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
583         try {
584                 for (int i = 0; i < groups.size(); i++) {
585                         if (groupname == groups[i]) { return true; }
586                 }
587                 
588                 return false;
589         }
590         catch(exception& e) {
591                 m->errorOut(e, "SharedCommand", "isValidGroup");
592                 exit(1);
593         }
594 }
595 /************************************************************/
596
597