]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
added checks for ^C to quit command instead of program
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11
12 //**********************************************************************************************************************
13
14 SharedCommand::SharedCommand(string o) : outputDir(o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 //getting output filename
19                 filename = globaldata->inputFileName;
20                 if (outputDir == "") { outputDir += hasPath(filename); }
21                 
22                 filename = outputDir + getRootName(getSimpleName(filename));
23                 filename = filename + "shared";
24                 
25                 openOutputFile(filename, out);
26                 pickedGroups = false;
27                 
28                 groupMap = globaldata->gGroupmap;
29                 
30                 //if hte user has not specified any groups then use them all
31                 if (globaldata->Groups.size() == 0) {
32                         groups = groupMap->namesOfGroups;
33                 }else{ //they have specified groups
34                         groups = globaldata->Groups;
35                         pickedGroups = true;
36                 }
37                 
38                 //fill filehandles with neccessary ofstreams
39                 int i;
40                 ofstream* temp;
41                 for (i=0; i<groups.size(); i++) {
42                         temp = new ofstream;
43                         filehandles[groups[i]] = temp;
44                 }
45                 
46                 //set fileroot
47                 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
48                 
49                 //clears file before we start to write to it below
50                 for (int i=0; i<groups.size(); i++) {
51                         remove((fileroot + groups[i] + ".rabund").c_str());
52                         outputNames.push_back((fileroot + groups[i] + ".rabund"));
53                 }
54
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "SharedCommand", "SharedCommand");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62
63 int SharedCommand::execute(){
64         try {
65                 
66                 //lookup.clear();
67                 string errorOff = "no error";
68                 //errorOff = "";
69                 
70                 //read in listfile
71                 read = new ReadOTUFile(globaldata->inputFileName);      
72                 read->read(&*globaldata); 
73                 delete read;
74
75                 input = globaldata->ginput;
76                 SharedList = globaldata->gSharedList;
77                 string lastLabel = SharedList->getLabel();
78                 vector<SharedRAbundVector*> lookup; 
79                 
80                 if (m->control_pressed) { 
81                         delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
82                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
83                         out.close(); remove(filename.c_str()); 
84                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
85                         return 1; 
86                 }
87                                 
88                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
89                         m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
90                         
91                         out.close();
92                         remove(filename.c_str()); //remove blank shared file you made
93                         
94                         createMisMatchFile();
95                         
96                         //delete memory
97                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
98                                 delete it3->second;
99                         }
100                         delete input;
101                         globaldata->ginput = NULL;
102                         delete SharedList;
103                         globaldata->gSharedList = NULL;
104                         
105                         return 1; 
106                 }
107                 
108                 //if user has specified groups make new groupfile for them
109                 if (globaldata->Groups.size() != 0) { //make new group file
110                         string groups = "";
111                         for (int i = 0; i < globaldata->Groups.size(); i++) {
112                                 groups += globaldata->Groups[i] + ".";
113                         }
114                 
115                         string newGroupFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + groups + "groups";
116                         ofstream outGroups;
117                         openOutputFile(newGroupFile, outGroups);
118                 
119                         vector<string> names = groupMap->getNamesSeqs();
120                         string groupName;
121                         for (int i = 0; i < names.size(); i++) {
122                                 groupName = groupMap->getGroup(names[i]);
123                                 if (isValidGroup(groupName, globaldata->Groups)) {
124                                         outGroups << names[i] << '\t' << groupName << endl;
125                                 }
126                         }
127                         outGroups.close();
128                 }
129                 
130                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
131                 set<string> processedLabels;
132                 set<string> userLabels = globaldata->labels;    
133         
134                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
135                         if (m->control_pressed) { 
136                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
137                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
138                                 out.close(); remove(filename.c_str()); 
139                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
140                                 return 1; 
141                         }
142                 
143                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
144                                         
145                                         lookup = SharedList->getSharedRAbundVector();
146                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
147                                         if (pickedGroups) { //check for otus with no seqs in them
148                                                 eliminateZeroOTUS(lookup);
149                                         }
150                                         
151                                         if (m->control_pressed) { 
152                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
153                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
154                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
155                                                 out.close(); remove(filename.c_str()); 
156                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
157                                                 return 1; 
158                                         }
159                                         
160                                         printSharedData(lookup); //prints info to the .shared file
161                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
162                                 
163                                         processedLabels.insert(SharedList->getLabel());
164                                         userLabels.erase(SharedList->getLabel());
165                         }
166                         
167                         if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
168                                         string saveLabel = SharedList->getLabel();
169                                         
170                                         delete SharedList;
171                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
172                                         
173                                         lookup = SharedList->getSharedRAbundVector();
174                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
175                                         if (pickedGroups) { //check for otus with no seqs in them
176                                                 eliminateZeroOTUS(lookup);
177                                         }
178                                         
179                                         
180                                         if (m->control_pressed) { 
181                                                 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL; 
182                                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
183                                                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
184                                                 out.close(); remove(filename.c_str()); 
185                                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
186                                                 return 1; 
187                                         }
188                                         
189                                         printSharedData(lookup); //prints info to the .shared file
190                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
191                                         
192                                         processedLabels.insert(SharedList->getLabel());
193                                         userLabels.erase(SharedList->getLabel());
194                                         
195                                         //restore real lastlabel to save below
196                                         SharedList->setLabel(saveLabel);
197                         }
198                         
199                 
200                         lastLabel = SharedList->getLabel();
201                                 
202                         delete SharedList;
203                         SharedList = input->getSharedListVector(); //get new list vector to process
204                 }
205                 
206                 //output error messages about any remaining user labels
207                 set<string>::iterator it;
208                 bool needToRun = false;
209                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
210                         if (processedLabels.count(lastLabel) != 1) {
211                                 needToRun = true;
212                         }
213                 }
214                 
215                 //run last label if you need to
216                 if (needToRun == true)  {
217                         if (SharedList != NULL) {       delete SharedList;      }
218                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
219                                         
220                         lookup = SharedList->getSharedRAbundVector();
221                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
222                         if (pickedGroups) { //check for otus with no seqs in them
223                                 eliminateZeroOTUS(lookup);
224                         }
225                         
226                         if (m->control_pressed) { 
227                                         delete input;  globaldata->ginput = NULL; 
228                                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
229                                         out.close(); remove(filename.c_str()); 
230                                         for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
231                                         return 1; 
232                         }
233                         
234                         printSharedData(lookup); //prints info to the .shared file
235                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
236                         delete SharedList;
237                 }
238                 
239                 globaldata->gSharedList = NULL;
240                 
241                 out.close();
242                 
243                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
244                         delete it3->second;
245                 }
246
247                 
248                 //change format to shared  to speed up commands
249                 globaldata->setFormat("sharedfile");
250                 globaldata->setListFile("");
251                 globaldata->setGroupFile("");
252                 globaldata->setSharedFile(filename);
253                 
254                 if (m->control_pressed) { 
255                                 delete input;  globaldata->ginput = NULL; 
256                                 remove(filename.c_str()); 
257                                 for (int i=0; i<groups.size(); i++) {  remove((fileroot + groups[i] + ".rabund").c_str());              }
258                                 return 1; 
259                 }
260                 
261                 m->mothurOutEndLine();
262                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
263                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
264                 m->mothurOut(filename); m->mothurOutEndLine();
265                 m->mothurOutEndLine();
266                 
267                 return 0;
268         }
269         catch(exception& e) {
270                 m->errorOut(e, "SharedCommand", "execute");
271                 exit(1);
272         }
273 }
274 //**********************************************************************************************************************
275 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
276         try {
277                 
278                 //initialize bin values
279                 for (int i = 0; i < thislookup.size(); i++) {
280 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
281                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
282                         thislookup[i]->print(out);
283                         
284                         RAbundVector rav = thislookup[i]->getRAbundVector();
285                         openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
286                         rav.print(*(filehandles[thislookup[i]->getGroup()]));
287                         (*(filehandles[thislookup[i]->getGroup()])).close();
288                 }
289  
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "SharedCommand", "printSharedData");
293                 exit(1);
294         }
295 }
296 //**********************************************************************************************************************
297 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
298         try {
299                 
300                 vector<SharedRAbundVector*> newLookup;
301                 for (int i = 0; i < thislookup.size(); i++) {
302                         SharedRAbundVector* temp = new SharedRAbundVector();
303                         temp->setLabel(thislookup[i]->getLabel());
304                         temp->setGroup(thislookup[i]->getGroup());
305                         newLookup.push_back(temp);
306                 }
307                 
308                 //for each bin
309                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
310                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
311                 
312                         //look at each sharedRabund and make sure they are not all zero
313                         bool allZero = true;
314                         for (int j = 0; j < thislookup.size(); j++) {
315                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
316                         }
317                         
318                         //if they are not all zero add this bin
319                         if (!allZero) {
320                                 for (int j = 0; j < thislookup.size(); j++) {
321                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
322                                 }
323                         }
324                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
325                 }
326         
327                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
328                 thislookup = newLookup;
329                 
330                 return 0;
331  
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
335                 exit(1);
336         }
337 }
338 //**********************************************************************************************************************
339 int SharedCommand::createMisMatchFile() {
340         try {
341                 ofstream outMisMatch;
342                 string outputMisMatchName = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
343                 
344                 //you have sequences in your list file that are not in your group file
345                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
346                         outputMisMatchName += "missing.group";
347                         m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
348                         
349                         openOutputFile(outputMisMatchName, outMisMatch);
350                         
351                         map<string, string> listNames;
352                         map<string, string>::iterator itList;
353                         
354                         //go through list and if group returns "not found" output it
355                         for (int i = 0; i < SharedList->getNumBins(); i++) {
356                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
357                         
358                                 string names = SharedList->get(i); 
359                                 
360                                 while (names.find_first_of(',') != -1) { 
361                                         string name = names.substr(0,names.find_first_of(','));
362                                         names = names.substr(names.find_first_of(',')+1, names.length());
363                                         string group = groupMap->getGroup(name);
364                                         
365                                         if(group == "not found") {      outMisMatch << name << endl;  }
366                                         
367                                         itList = listNames.find(name);
368                                         if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
369                                         else { listNames[name] = name; }
370                                 }
371                         
372                                 //get last name
373                                 string group = groupMap->getGroup(names);
374                                 if(group == "not found") {      outMisMatch << names << endl;  }        
375                                 
376                                 itList = listNames.find(names);
377                                 if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
378                                 else { listNames[names] = names; }
379
380                         }
381                         
382                         outMisMatch.close();
383                         
384                 
385                 }else {//you have sequences in your group file that are not in you list file
386                         
387                         outputMisMatchName += "missing.name";
388                         m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
389                         
390                         map<string, string> namesInList;
391                         map<string, string>::iterator itList;
392                         
393                         //go through listfile and get names
394                         for (int i = 0; i < SharedList->getNumBins(); i++) {
395                                 if (m->control_pressed) {  return 0; } 
396
397                                 
398                                 string names = SharedList->get(i); 
399                 
400                                 while (names.find_first_of(',') != -1) { 
401                                         string name = names.substr(0,names.find_first_of(','));
402                                         names = names.substr(names.find_first_of(',')+1, names.length());
403                                         
404                                         itList = namesInList.find(name);
405                                         if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
406
407                                         namesInList[name] = name;
408                                         
409                                 }
410                                 
411                                 itList = namesInList.find(names);
412                                 if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
413
414                                 //get last name
415                                 namesInList[names] = names;                             
416                         }
417                         
418                         //get names of sequences in groupfile
419                         vector<string> seqNames = groupMap->getNamesSeqs();
420                 
421                         map<string, string>::iterator itMatch;
422                         
423                         openOutputFile(outputMisMatchName, outMisMatch);
424                         
425                         //loop through names in seqNames and if they aren't in namesIn list output them
426                         for (int i = 0; i < seqNames.size(); i++) {
427                                 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; } 
428                                 
429                                 itMatch = namesInList.find(seqNames[i]);
430                                 
431                                 if (itMatch == namesInList.end()) {
432                                 
433                                         outMisMatch << seqNames[i] << endl; 
434                                 }
435                         }               
436                         outMisMatch.close();
437                 }
438                 
439                 return 0;
440         }
441         catch(exception& e) {
442                 m->errorOut(e, "SharedCommand", "createMisMatchFile");
443                 exit(1);
444         }
445 }
446
447 //**********************************************************************************************************************
448
449 SharedCommand::~SharedCommand(){
450         //delete list;
451         
452         
453 }
454
455 //**********************************************************************************************************************
456
457 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
458         try {
459                 for (int i = 0; i < groups.size(); i++) {
460                         if (groupname == groups[i]) { return true; }
461                 }
462                 
463                 return false;
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "SharedCommand", "isValidGroup");
467                 exit(1);
468         }
469 }
470 /************************************************************/
471
472