]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
finished chimera changes
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11
12 //**********************************************************************************************************************
13
14 SharedCommand::SharedCommand(string o) : outputDir(o) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 //getting output filename
19                 filename = globaldata->inputFileName;
20                 if (outputDir == "") { outputDir += hasPath(filename); }
21                 
22                 filename = outputDir + getRootName(getSimpleName(filename));
23                 filename = filename + "shared";
24                 
25                 openOutputFile(filename, out);
26                 pickedGroups = false;
27                 
28                 groupMap = globaldata->gGroupmap;
29                 
30                 //if hte user has not specified any groups then use them all
31                 if (globaldata->Groups.size() == 0) {
32                         groups = groupMap->namesOfGroups;
33                 }else{ //they have specified groups
34                         groups = globaldata->Groups;
35                         pickedGroups = true;
36                 }
37                 
38                 //fill filehandles with neccessary ofstreams
39                 int i;
40                 ofstream* temp;
41                 for (i=0; i<groups.size(); i++) {
42                         temp = new ofstream;
43                         filehandles[groups[i]] = temp;
44                 }
45                 
46                 //set fileroot
47                 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
48                 
49                 //clears file before we start to write to it below
50                 for (int i=0; i<groups.size(); i++) {
51                         remove((fileroot + groups[i] + ".rabund").c_str());
52                 }
53
54         }
55         catch(exception& e) {
56                 errorOut(e, "SharedCommand", "SharedCommand");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61
62 int SharedCommand::execute(){
63         try {
64                 
65                 //lookup.clear();
66                 string errorOff = "no error";
67                 //errorOff = "";
68                 
69                 //read in listfile
70                 read = new ReadOTUFile(globaldata->inputFileName);      
71                 read->read(&*globaldata); 
72                 delete read;
73
74                 input = globaldata->ginput;
75                 SharedList = globaldata->gSharedList;
76                 string lastLabel = SharedList->getLabel();
77                 vector<SharedRAbundVector*> lookup; 
78                 
79                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
80                         mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
81                         
82                         out.close();
83                         remove(filename.c_str()); //remove blank shared file you made
84                         
85                         createMisMatchFile();
86                         
87                         //delete memory
88                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
89                                 delete it3->second;
90                         }
91                         delete SharedList;
92                         globaldata->gSharedList = NULL;
93                         
94                         return 1; 
95                 }
96                 
97                 //if user has specified groups make new groupfile for them
98                 if (globaldata->Groups.size() != 0) { //make new group file
99                         string groups = "";
100                         for (int i = 0; i < globaldata->Groups.size(); i++) {
101                                 groups += globaldata->Groups[i] + ".";
102                         }
103                 
104                         string newGroupFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + groups + "groups";
105                         ofstream outGroups;
106                         openOutputFile(newGroupFile, outGroups);
107                 
108                         vector<string> names = groupMap->getNamesSeqs();
109                         string groupName;
110                         for (int i = 0; i < names.size(); i++) {
111                                 groupName = groupMap->getGroup(names[i]);
112                                 if (isValidGroup(groupName, globaldata->Groups)) {
113                                         outGroups << names[i] << '\t' << groupName << endl;
114                                 }
115                         }
116                         outGroups.close();
117                 }
118                 
119                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
120                 set<string> processedLabels;
121                 set<string> userLabels = globaldata->labels;    
122         
123                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
124                 
125                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
126                         
127                                         lookup = SharedList->getSharedRAbundVector();
128                                         if (pickedGroups) { //check for otus with no seqs in them
129                                                 eliminateZeroOTUS(lookup);
130                                         }
131                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
132                                         
133                                         printSharedData(lookup); //prints info to the .shared file
134                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
135                                 
136                                         processedLabels.insert(SharedList->getLabel());
137                                         userLabels.erase(SharedList->getLabel());
138                         }
139                         
140                         if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
141                                         string saveLabel = SharedList->getLabel();
142                                         
143                                         delete SharedList;
144                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
145                                         
146                                         lookup = SharedList->getSharedRAbundVector();
147                                         if (pickedGroups) { //check for otus with no seqs in them
148                                                 eliminateZeroOTUS(lookup);
149                                         }
150                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
151                                         
152                                         printSharedData(lookup); //prints info to the .shared file
153                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
154                                         
155                                         processedLabels.insert(SharedList->getLabel());
156                                         userLabels.erase(SharedList->getLabel());
157                                         
158                                         //restore real lastlabel to save below
159                                         SharedList->setLabel(saveLabel);
160                         }
161                         
162                 
163                         lastLabel = SharedList->getLabel();
164                                 
165                         delete SharedList;
166                         SharedList = input->getSharedListVector(); //get new list vector to process
167                 }
168                 
169                 //output error messages about any remaining user labels
170                 set<string>::iterator it;
171                 bool needToRun = false;
172                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
173                         if (processedLabels.count(lastLabel) != 1) {
174                                 needToRun = true;
175                         }
176                 }
177                 
178                 //run last label if you need to
179                 if (needToRun == true)  {
180                         if (SharedList != NULL) {       delete SharedList;      }
181                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
182                                         
183                         lookup = SharedList->getSharedRAbundVector();
184                         if (pickedGroups) { //check for otus with no seqs in them
185                                 eliminateZeroOTUS(lookup);
186                         }
187                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
188                         
189                         printSharedData(lookup); //prints info to the .shared file
190                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
191                         delete SharedList;
192                 }
193                 
194                 globaldata->gSharedList = NULL;
195                 
196                 out.close();
197                 
198                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
199                         delete it3->second;
200                 }
201
202                 
203                 //change format to shared  to speed up commands
204                 globaldata->setFormat("sharedfile");
205                 globaldata->setListFile("");
206                 globaldata->setGroupFile("");
207                 globaldata->setSharedFile(filename);
208
209                 
210                 return 0;
211         }
212         catch(exception& e) {
213                 errorOut(e, "SharedCommand", "execute");
214                 exit(1);
215         }
216 }
217 //**********************************************************************************************************************
218 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
219         try {
220                 
221                 //initialize bin values
222                 for (int i = 0; i < thislookup.size(); i++) {
223 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
224                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
225                         thislookup[i]->print(out);
226                         
227                         RAbundVector rav = thislookup[i]->getRAbundVector();
228                         openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
229                         rav.print(*(filehandles[thislookup[i]->getGroup()]));
230                         (*(filehandles[thislookup[i]->getGroup()])).close();
231                 }
232  
233         }
234         catch(exception& e) {
235                 errorOut(e, "SharedCommand", "printSharedData");
236                 exit(1);
237         }
238 }
239 //**********************************************************************************************************************
240 void SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
241         try {
242                 
243                 vector<SharedRAbundVector*> newLookup;
244                 for (int i = 0; i < thislookup.size(); i++) {
245                         SharedRAbundVector* temp = new SharedRAbundVector();
246                         temp->setLabel(thislookup[i]->getLabel());
247                         temp->setGroup(thislookup[i]->getGroup());
248                         newLookup.push_back(temp);
249                 }
250                 
251                 //for each bin
252                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
253                 
254                         //look at each sharedRabund and make sure they are not all zero
255                         bool allZero = true;
256                         for (int j = 0; j < thislookup.size(); j++) {
257                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
258                         }
259                         
260                         //if they are not all zero add this bin
261                         if (!allZero) {
262                                 for (int j = 0; j < thislookup.size(); j++) {
263                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
264                                 }
265                         }
266                         //else{  cout << "bin # " << i << " is all zeros" << endl;  }
267                 }
268         
269                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
270                 thislookup = newLookup;
271         
272  
273         }
274         catch(exception& e) {
275                 errorOut(e, "SharedCommand", "eliminateZeroOTUS");
276                 exit(1);
277         }
278 }
279 //**********************************************************************************************************************
280 void SharedCommand::createMisMatchFile() {
281         try {
282                 ofstream outMisMatch;
283                 string outputMisMatchName = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
284                 
285                 //you have sequences in your list file that are not in your group file
286                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
287                         outputMisMatchName += "missing.group";
288                         mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
289                         
290                         openOutputFile(outputMisMatchName, outMisMatch);
291                         
292                         map<string, string> listNames;
293                         map<string, string>::iterator itList;
294                         
295                         //go through list and if group returns "not found" output it
296                         for (int i = 0; i < SharedList->getNumBins(); i++) {
297                         
298                                 string names = SharedList->get(i); 
299                                 
300                                 while (names.find_first_of(',') != -1) { 
301                                         string name = names.substr(0,names.find_first_of(','));
302                                         names = names.substr(names.find_first_of(',')+1, names.length());
303                                         string group = groupMap->getGroup(name);
304                                         
305                                         if(group == "not found") {      outMisMatch << name << endl;  }
306                                         
307                                         itList = listNames.find(name);
308                                         if (itList != listNames.end()) {  mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); mothurOutEndLine(); }
309                                         else { listNames[name] = name; }
310                                 }
311                         
312                                 //get last name
313                                 string group = groupMap->getGroup(names);
314                                 if(group == "not found") {      outMisMatch << names << endl;  }        
315                                 
316                                 itList = listNames.find(names);
317                                 if (itList != listNames.end()) {  mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); mothurOutEndLine(); }
318                                 else { listNames[names] = names; }
319
320                         }
321                         
322                         outMisMatch.close();
323                         
324                 
325                 }else {//you have sequences in your group file that are not in you list file
326                         
327                         outputMisMatchName += "missing.name";
328                         mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
329                         
330                         map<string, string> namesInList;
331                         map<string, string>::iterator itList;
332                         
333                         //go through listfile and get names
334                         for (int i = 0; i < SharedList->getNumBins(); i++) {
335                                 
336                                 string names = SharedList->get(i); 
337                 
338                                 while (names.find_first_of(',') != -1) { 
339                                         string name = names.substr(0,names.find_first_of(','));
340                                         names = names.substr(names.find_first_of(',')+1, names.length());
341                                         
342                                         itList = namesInList.find(name);
343                                         if (itList != namesInList.end()) {  mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); mothurOutEndLine(); }
344
345                                         namesInList[name] = name;
346                                         
347                                 }
348                                 
349                                 itList = namesInList.find(names);
350                                 if (itList != namesInList.end()) {  mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); mothurOutEndLine(); }
351
352                                 //get last name
353                                 namesInList[names] = names;                             
354                         }
355                         
356                         //get names of sequences in groupfile
357                         vector<string> seqNames = groupMap->getNamesSeqs();
358                 
359                         map<string, string>::iterator itMatch;
360                         
361                         openOutputFile(outputMisMatchName, outMisMatch);
362                         
363                         //loop through names in seqNames and if they aren't in namesIn list output them
364                         for (int i = 0; i < seqNames.size(); i++) {
365                                 
366                                 itMatch = namesInList.find(seqNames[i]);
367                                 
368                                 if (itMatch == namesInList.end()) {
369                                 
370                                         outMisMatch << seqNames[i] << endl; 
371                                 }
372                         }               
373                         outMisMatch.close();
374                 }
375  
376         }
377         catch(exception& e) {
378                 errorOut(e, "SharedCommand", "createMisMatchFile");
379                 exit(1);
380         }
381 }
382
383 //**********************************************************************************************************************
384
385 SharedCommand::~SharedCommand(){
386         //delete list;
387         
388         
389 }
390
391 //**********************************************************************************************************************
392
393 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
394         try {
395                 for (int i = 0; i < groups.size(); i++) {
396                         if (groupname == groups[i]) { return true; }
397                 }
398                 
399                 return false;
400         }
401         catch(exception& e) {
402                 errorOut(e, "SharedCommand", "isValidGroup");
403                 exit(1);
404         }
405 }
406 /************************************************************/
407
408