]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11 //********************************************************************************************************************
12 //sorts lowest to highest
13 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
14         return (left->getGroup() < right->getGroup());  
15
16 //**********************************************************************************************************************
17 vector<string> SharedCommand::getValidParameters(){     
18         try {
19                 vector<string> myArray; 
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "SharedCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 SharedCommand::SharedCommand(){ 
29         try {           
30                 
31                 //initialize outputTypes
32                 vector<string> tempOutNames;
33                 outputTypes["rabund"] = tempOutNames;
34                 outputTypes["shared"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "SharedCommand", "SharedCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> SharedCommand::getRequiredParameters(){  
43         try {
44                 vector<string> myArray;
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SharedCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SharedCommand::getRequiredFiles(){       
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SharedCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 SharedCommand::SharedCommand(string o) : outputDir(o) {
66         try {
67                 globaldata = GlobalData::getInstance();
68                 
69                 //initialize outputTypes
70                 vector<string> tempOutNames;
71                 outputTypes["rabund"] = tempOutNames;
72                 outputTypes["shared"] = tempOutNames;
73                 
74                 //getting output filename
75                 filename = globaldata->inputFileName;
76                 if (outputDir == "") { outputDir += m->hasPath(filename); }
77                 
78                 filename = outputDir + m->getRootName(m->getSimpleName(filename));
79                 filename = filename + "shared";
80                 outputTypes["shared"].push_back(filename);
81                 
82                 m->openOutputFile(filename, out);
83                 pickedGroups = false;
84                 
85                 groupMap = globaldata->gGroupmap;
86                 
87                 //if hte user has not specified any groups then use them all
88                 if (globaldata->Groups.size() == 0) {
89                         groups = groupMap->namesOfGroups;
90                 }else{ //they have specified groups
91                         groups = globaldata->Groups;
92                         pickedGroups = true;
93                 }
94                 
95                 //fill filehandles with neccessary ofstreams
96                 int i;
97                 ofstream* temp;
98                 for (i=0; i<groups.size(); i++) {
99                         temp = new ofstream;
100                         filehandles[groups[i]] = temp;
101                 }
102                 
103                 //set fileroot
104                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->getListFile()));
105                 
106                 //clears file before we start to write to it below
107                 for (int i=0; i<groups.size(); i++) {
108                         remove((fileroot + groups[i] + ".rabund").c_str());
109                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
110                         outputTypes["rabund"].push_back((fileroot + groups[i] + ".rabund"));
111                 }
112
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "SharedCommand", "SharedCommand");
116                 exit(1);
117         }
118 }
119 //**********************************************************************************************************************
120
121 int SharedCommand::execute(){
122         try {
123                 
124                 //lookup.clear();
125                 string errorOff = "no error";
126                 //errorOff = "";
127                 
128                 //if user provided an order file containing the order the shared file should be in read it
129                 if (globaldata->getOrderGroupFile() != "") { readOrderFile(); }
130                 
131                 //read in listfile
132                 read = new ReadOTUFile(globaldata->inputFileName);      
133                 read->read(&*globaldata); 
134                 delete read;
135
136                 input = globaldata->ginput;
137                 SharedList = globaldata->gSharedList;
138                 string lastLabel = SharedList->getLabel();
139                 vector<SharedRAbundVector*> lookup; 
140                 
141                 if (m->control_pressed) { 
142                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
143                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
144                         out.close(); remove(filename.c_str()); 
145                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
146                         return 1; 
147                 }
148                                 
149                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
150                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
151                         
152                         out.close();
153                         remove(filename.c_str()); //remove blank shared file you made
154                         
155                         createMisMatchFile();
156                         
157                         //delete memory
158                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
159                                 delete it3->second;
160                         }
161                         delete input;
162                         globaldata->ginput = NULL;
163                         delete SharedList;
164                         globaldata->gSharedList = NULL;
165                         
166                         return 1; 
167                 }
168                 
169                 //if user has specified groups make new groupfile for them
170                 if (globaldata->Groups.size() != 0) { //make new group file
171                         string groups = "";
172                         for (int i = 0; i < globaldata->Groups.size(); i++) {
173                                 groups += globaldata->Groups[i] + ".";
174                         }
175                 
176                         string newGroupFile = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + groups + "groups";
177                         ofstream outGroups;
178                         m->openOutputFile(newGroupFile, outGroups);
179                 
180                         vector<string> names = groupMap->getNamesSeqs();
181                         string groupName;
182                         for (int i = 0; i < names.size(); i++) {
183                                 groupName = groupMap->getGroup(names[i]);
184                                 if (isValidGroup(groupName, globaldata->Groups)) {
185                                         outGroups << names[i] << '\t' << groupName << endl;
186                                 }
187                         }
188                         outGroups.close();
189                 }
190                 
191                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
192                 set<string> processedLabels;
193                 set<string> userLabels = globaldata->labels;    
194         
195                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
196                         if (m->control_pressed) { 
197                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
198                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
199                                 out.close(); remove(filename.c_str()); 
200                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
201                                 return 1; 
202                         }
203                 
204                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
205                                         
206                                         lookup = SharedList->getSharedRAbundVector();
207                                         
208                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
209                                         if (pickedGroups) { //check for otus with no seqs in them
210                                                 eliminateZeroOTUS(lookup);
211                                         }
212                                         
213                                         if (m->control_pressed) { 
214                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
215                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
216                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
217                                                 out.close(); remove(filename.c_str()); 
218                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
219                                                 return 1; 
220                                         }
221                                         
222                                         printSharedData(lookup); //prints info to the .shared file
223                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
224                                 
225                                         processedLabels.insert(SharedList->getLabel());
226                                         userLabels.erase(SharedList->getLabel());
227                         }
228                         
229                         if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
230                                         string saveLabel = SharedList->getLabel();
231                                         
232                                         delete SharedList;
233                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
234                                         
235                                         lookup = SharedList->getSharedRAbundVector();
236                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
237                                         if (pickedGroups) { //check for otus with no seqs in them
238                                                 eliminateZeroOTUS(lookup);
239                                         }
240                                         
241                                         
242                                         if (m->control_pressed) { 
243                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
244                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
245                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
246                                                 out.close(); remove(filename.c_str()); 
247                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
248                                                 return 1; 
249                                         }
250                                         
251                                         printSharedData(lookup); //prints info to the .shared file
252                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
253                                         
254                                         processedLabels.insert(SharedList->getLabel());
255                                         userLabels.erase(SharedList->getLabel());
256                                         
257                                         //restore real lastlabel to save below
258                                         SharedList->setLabel(saveLabel);
259                         }
260                         
261                 
262                         lastLabel = SharedList->getLabel();
263                                 
264                         delete SharedList;
265                         SharedList = input->getSharedListVector(); //get new list vector to process
266                 }
267                 
268                 //output error messages about any remaining user labels
269                 set<string>::iterator it;
270                 bool needToRun = false;
271                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
272                         if (processedLabels.count(lastLabel) != 1) {
273                                 needToRun = true;
274                         }
275                 }
276                 
277                 //run last label if you need to
278                 if (needToRun == true)  {
279                         if (SharedList != NULL) {       delete SharedList;      }
280                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
281                                         
282                         lookup = SharedList->getSharedRAbundVector();
283                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
284                         if (pickedGroups) { //check for otus with no seqs in them
285                                 eliminateZeroOTUS(lookup);
286                         }
287                         
288                         if (m->control_pressed) { 
289                                         delete input;  globaldata->ginput = NULL; 
290                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
291                                         out.close(); remove(filename.c_str()); 
292                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
293                                         return 1; 
294                         }
295                         
296                         printSharedData(lookup); //prints info to the .shared file
297                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
298                         delete SharedList;
299                 }
300                 
301                 globaldata->gSharedList = NULL;
302                 
303                 out.close();
304                 
305                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
306                         delete it3->second;
307                 }
308
309                 
310                 //change format to shared  to speed up commands
311                 globaldata->setFormat("sharedfile");
312                 globaldata->setListFile("");
313                 globaldata->setGroupFile("");
314                 globaldata->setSharedFile(filename);
315                 
316                 
317                 if (m->control_pressed) { 
318                                 delete input;  globaldata->ginput = NULL; 
319                                 remove(filename.c_str()); 
320                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
321                                 return 1; 
322                 }
323                 
324                 //set rabund file as new current rabundfile
325                 string current = "";
326                 itTypes = outputTypes.find("rabund");
327                 if (itTypes != outputTypes.end()) {
328                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
329                 }
330                 
331                 itTypes = outputTypes.find("shared");
332                 if (itTypes != outputTypes.end()) {
333                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
334                 }       
335                 
336                 m->mothurOutEndLine();
337                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
338                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
339                 m->mothurOut(filename); m->mothurOutEndLine();
340                 m->mothurOutEndLine();
341                 
342                 return 0;
343         }
344         catch(exception& e) {
345                 m->errorOut(e, "SharedCommand", "execute");
346                 exit(1);
347         }
348 }
349 //**********************************************************************************************************************
350 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
351         try {
352                 
353                 if (order.size() == 0) { //user has not specified an order so do aplabetically
354                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
355                         
356                         globaldata->Groups.clear();
357                         
358                         //initialize bin values
359                         for (int i = 0; i < thislookup.size(); i++) {
360                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
361                                 thislookup[i]->print(out);
362                                 
363                                 globaldata->Groups.push_back(thislookup[i]->getGroup());
364                                 
365                                 RAbundVector rav = thislookup[i]->getRAbundVector();
366                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
367                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
368                                 (*(filehandles[thislookup[i]->getGroup()])).close();
369                         }
370                 }else{
371                         //create a map from groupName to each sharedrabund
372                         map<string, SharedRAbundVector*> myMap;
373                         map<string, SharedRAbundVector*>::iterator myIt;
374                         
375                         for (int i = 0; i < thislookup.size(); i++) {
376                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
377                         }
378                         
379                         globaldata->Groups.clear();
380                         
381                         //loop through ordered list and print the rabund
382                         for (int i = 0; i < order.size(); i++) {
383                                 myIt = myMap.find(order[i]);
384                                 
385                                 if(myIt != myMap.end()) { //we found it
386                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
387                                         (myIt->second)->print(out);
388                                         
389                                         globaldata->Groups.push_back((myIt->second)->getGroup());
390                                 
391                                         RAbundVector rav = (myIt->second)->getRAbundVector();
392                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
393                                         rav.print(*(filehandles[(myIt->second)->getGroup()]));
394                                         (*(filehandles[(myIt->second)->getGroup()])).close();
395                                 }else{
396                                         m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
397                                 }
398                         }
399                 
400                 }
401  
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "SharedCommand", "printSharedData");
405                 exit(1);
406         }
407 }
408 //**********************************************************************************************************************
409 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
410         try {
411                 
412                 vector<SharedRAbundVector*> newLookup;
413                 for (int i = 0; i < thislookup.size(); i++) {
414                         SharedRAbundVector* temp = new SharedRAbundVector();
415                         temp->setLabel(thislookup[i]->getLabel());
416                         temp->setGroup(thislookup[i]->getGroup());
417                         newLookup.push_back(temp);
418                 }
419                 
420                 //for each bin
421                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
422                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
423                 
424                         //look at each sharedRabund and make sure they are not all zero
425                         bool allZero = true;
426                         for (int j = 0; j < thislookup.size(); j++) {
427                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
428                         }
429                         
430                         //if they are not all zero add this bin
431                         if (!allZero) {
432                                 for (int j = 0; j < thislookup.size(); j++) {
433                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
434                                 }
435                         }
436                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
437                 }
438         
439                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
440                 thislookup = newLookup;
441                 
442                 return 0;
443  
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
447                 exit(1);
448         }
449 }
450 //**********************************************************************************************************************
451 int SharedCommand::createMisMatchFile() {
452         try {
453                 ofstream outMisMatch;
454                 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
455                 
456                 //you have sequences in your list file that are not in your group file
457                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
458                         outputMisMatchName += "missing.group";
459                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
460                         
461                         m->openOutputFile(outputMisMatchName, outMisMatch);
462                         
463                         map<string, string> listNames;
464                         map<string, string>::iterator itList;
465                         
466                         //go through list and if group returns "not found" output it
467                         for (int i = 0; i < SharedList->getNumBins(); i++) {
468                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
469                         
470                                 string names = SharedList->get(i); 
471                                 
472                                 while (names.find_first_of(',') != -1) { 
473                                         string name = names.substr(0,names.find_first_of(','));
474                                         names = names.substr(names.find_first_of(',')+1, names.length());
475                                         string group = groupMap->getGroup(name);
476                                         
477                                         if(group == "not found") {      outMisMatch << name << endl;  }
478                                         
479                                         itList = listNames.find(name);
480                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
481                                         else { listNames[name] = name; }
482                                 }
483                         
484                                 //get last name
485                                 string group = groupMap->getGroup(names);
486                                 if(group == "not found") {      outMisMatch << names << endl;  }        
487                                 
488                                 itList = listNames.find(names);
489                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
490                                 else { listNames[names] = names; }
491
492                         }
493                         
494                         outMisMatch.close();
495                         
496                 
497                 }else {//you have sequences in your group file that are not in you list file
498                         
499                         outputMisMatchName += "missing.name";
500                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
501                         
502                         map<string, string> namesInList;
503                         map<string, string>::iterator itList;
504                         
505                         //go through listfile and get names
506                         for (int i = 0; i < SharedList->getNumBins(); i++) {
507                                 if (m->control_pressed) {  return 0; } 
508
509                                 
510                                 string names = SharedList->get(i); 
511                 
512                                 while (names.find_first_of(',') != -1) { 
513                                         string name = names.substr(0,names.find_first_of(','));
514                                         names = names.substr(names.find_first_of(',')+1, names.length());
515                                         
516                                         itList = namesInList.find(name);
517                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
518
519                                         namesInList[name] = name;
520                                         
521                                 }
522                                 
523                                 itList = namesInList.find(names);
524                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
525
526                                 //get last name
527                                 namesInList[names] = names;                             
528                         }
529                         
530                         //get names of sequences in groupfile
531                         vector<string> seqNames = groupMap->getNamesSeqs();
532                 
533                         map<string, string>::iterator itMatch;
534                         
535                         m->openOutputFile(outputMisMatchName, outMisMatch);
536                         
537                         //loop through names in seqNames and if they aren't in namesIn list output them
538                         for (int i = 0; i < seqNames.size(); i++) {
539                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
540                                 
541                                 itMatch = namesInList.find(seqNames[i]);
542                                 
543                                 if (itMatch == namesInList.end()) {
544                                 
545                                         outMisMatch << seqNames[i] << endl; 
546                                 }
547                         }               
548                         outMisMatch.close();
549                 }
550                 
551                 return 0;
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
555                 exit(1);
556         }
557 }
558
559 //**********************************************************************************************************************
560
561 SharedCommand::~SharedCommand(){
562         //delete list;
563         
564         
565 }
566 //**********************************************************************************************************************
567 int SharedCommand::readOrderFile() {
568         try {
569                 //remove old names
570                 order.clear();
571                 
572                 ifstream in;
573                 m->openInputFile(globaldata->getOrderGroupFile(), in);
574                 string thisGroup;
575                 
576                 while(!in.eof()){
577                         in >> thisGroup; m->gobble(in);
578                                                 
579                         order.push_back(thisGroup);
580                         
581                         if (m->control_pressed) { order.clear(); break; }
582                 }
583                 in.close();             
584                 
585                 return 0;
586         }
587         catch(exception& e) {
588                 m->errorOut(e, "SharedCommand", "readOrderFile");
589                 exit(1);
590         }
591 }
592 //**********************************************************************************************************************
593
594 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
595         try {
596                 for (int i = 0; i < groups.size(); i++) {
597                         if (groupname == groups[i]) { return true; }
598                 }
599                 
600                 return false;
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "SharedCommand", "isValidGroup");
604                 exit(1);
605         }
606 }
607 /************************************************************/
608
609