]> git.donarmstrong.com Git - mothur.git/blob - sharedcommand.cpp
added groups option to read.otu, added qtrim option to trim.seqs, fixed bug in get...
[mothur.git] / sharedcommand.cpp
1 /*
2  *  sharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedcommand.h"
11
12 //**********************************************************************************************************************
13
14 SharedCommand::SharedCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 //getting output filename
19                 filename = globaldata->inputFileName;
20                 filename = getRootName(filename);
21                 filename = filename + "shared";
22                 openOutputFile(filename, out);
23                 
24                 groupMap = globaldata->gGroupmap;
25                 
26                 //if hte user has not specified any groups then use them all
27                 if (globaldata->Groups.size() == 0) {
28                         groups = groupMap->namesOfGroups;
29                 }else{ //they have specified groups
30                         groups = globaldata->Groups;
31                 }
32                 
33                 //fill filehandles with neccessary ofstreams
34                 int i;
35                 ofstream* temp;
36                 for (i=0; i<groups.size(); i++) {
37                         temp = new ofstream;
38                         filehandles[groups[i]] = temp;
39                 }
40                 
41                 //set fileroot
42                 fileroot = getRootName(globaldata->getListFile());
43                 
44                 //clears file before we start to write to it below
45                 for (int i=0; i<groups.size(); i++) {
46                         remove((fileroot + groups[i] + ".rabund").c_str());
47                 }
48
49         }
50         catch(exception& e) {
51                 errorOut(e, "SharedCommand", "SharedCommand");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56
57 int SharedCommand::execute(){
58         try {
59                 
60                 //lookup.clear();
61                 string errorOff = "no error";
62                 //errorOff = "";
63                         
64                 //read in listfile
65                 read = new ReadOTUFile(globaldata->inputFileName);      
66                 read->read(&*globaldata); 
67                 delete read;
68
69                 input = globaldata->ginput;
70                 SharedList = globaldata->gSharedList;
71                 string lastLabel = SharedList->getLabel();
72                 vector<SharedRAbundVector*> lookup; 
73                 
74                 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
75                         mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
76                         
77                         out.close();
78                         remove(filename.c_str()); //remove blank shared file you made
79                         
80                         createMisMatchFile();
81                         
82                         //delete memory
83                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
84                                 delete it3->second;
85                         }
86                         delete SharedList;
87                         globaldata->gSharedList = NULL;
88                         
89                         return 1; 
90                 }
91                 
92                 //if user has specified groups make new groupfile for them
93                 if (globaldata->Groups.size() != 0) { //make new group file
94                         string groups = "";
95                         for (int i = 0; i < globaldata->Groups.size(); i++) {
96                                 groups += globaldata->Groups[i] + ".";
97                         }
98                 
99                         string newGroupFile = getRootName(globaldata->inputFileName) + groups + "groups";
100                         ofstream outGroups;
101                         openOutputFile(newGroupFile, outGroups);
102                 
103                         vector<string> names = groupMap->getNamesSeqs();
104                         string groupName;
105                         for (int i = 0; i < names.size(); i++) {
106                                 groupName = groupMap->getGroup(names[i]);
107                                 if (isValidGroup(groupName, globaldata->Groups)) {
108                                         outGroups << names[i] << '\t' << groupName << endl;
109                                 }
110                         }
111                         outGroups.close();
112                 }
113                 
114                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
115                 set<string> processedLabels;
116                 set<string> userLabels = globaldata->labels;    
117                 
118                 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
119                         
120                         if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
121                         
122                                         lookup = SharedList->getSharedRAbundVector();
123                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
124                                         
125                                         printSharedData(lookup); //prints info to the .shared file
126                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
127                                 
128                                         processedLabels.insert(SharedList->getLabel());
129                                         userLabels.erase(SharedList->getLabel());
130                         }
131                         
132                         if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
133                                         string saveLabel = SharedList->getLabel();
134                                         
135                                         delete SharedList;
136                                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
137                                         
138                                         lookup = SharedList->getSharedRAbundVector();
139                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
140                                         
141                                         printSharedData(lookup); //prints info to the .shared file
142                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
143                                         
144                                         processedLabels.insert(SharedList->getLabel());
145                                         userLabels.erase(SharedList->getLabel());
146                                         
147                                         //restore real lastlabel to save below
148                                         SharedList->setLabel(saveLabel);
149                         }
150                         
151                 
152                         lastLabel = SharedList->getLabel();
153                                 
154                         delete SharedList;
155                         SharedList = input->getSharedListVector(); //get new list vector to process
156                 }
157                 
158                 //output error messages about any remaining user labels
159                 set<string>::iterator it;
160                 bool needToRun = false;
161                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
162                         if (processedLabels.count(lastLabel) != 1) {
163                                 needToRun = true;
164                         }
165                 }
166                 
167                 //run last label if you need to
168                 if (needToRun == true)  {
169                         if (SharedList != NULL) {       delete SharedList;      }
170                         SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
171                                         
172                         lookup = SharedList->getSharedRAbundVector();
173                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
174                         
175                         printSharedData(lookup); //prints info to the .shared file
176                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
177                         delete SharedList;
178                 }
179                 
180                 globaldata->gSharedList = NULL;
181                 
182                 out.close();
183                 
184                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
185                         delete it3->second;
186                 }
187
188                 
189                 return 0;
190         }
191         catch(exception& e) {
192                 errorOut(e, "SharedCommand", "execute");
193                 exit(1);
194         }
195 }
196 //**********************************************************************************************************************
197 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
198         try {
199                 
200                 //initialize bin values
201                 for (int i = 0; i < thislookup.size(); i++) {
202 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
203                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
204                         thislookup[i]->print(out);
205                         
206                         RAbundVector rav = thislookup[i]->getRAbundVector();
207                         openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
208                         rav.print(*(filehandles[thislookup[i]->getGroup()]));
209                         (*(filehandles[thislookup[i]->getGroup()])).close();
210                 }
211  
212         }
213         catch(exception& e) {
214                 errorOut(e, "SharedCommand", "printSharedData");
215                 exit(1);
216         }
217 }
218 //**********************************************************************************************************************
219 void SharedCommand::createMisMatchFile() {
220         try {
221                 ofstream outMisMatch;
222                 string outputMisMatchName = getRootName(globaldata->inputFileName);
223                 
224                 //you have sequences in your list file that are not in your group file
225                 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
226                         outputMisMatchName += "missing.group";
227                         mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
228                         
229                         openOutputFile(outputMisMatchName, outMisMatch);
230                         
231                         //go through list and if group returns "not found" output it
232                         for (int i = 0; i < SharedList->getNumBins(); i++) {
233                         
234                                 string names = SharedList->get(i); 
235                                 
236                                 while (names.find_first_of(',') != -1) { 
237                                         string name = names.substr(0,names.find_first_of(','));
238                                         names = names.substr(names.find_first_of(',')+1, names.length());
239                                         string group = groupMap->getGroup(name);
240                                         
241                                         if(group == "not found") {      outMisMatch << name << endl;  }
242                                 }
243                                 
244                                 //get last name
245                                 string group = groupMap->getGroup(names);
246                                 if(group == "not found") {      outMisMatch << names << endl;  }                                
247                         }
248                         
249                         outMisMatch.close();
250                         
251                 
252                 }else {//you have sequences in your group file that are not in you list file
253                         
254                         outputMisMatchName += "missing.name";
255                         mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
256                         
257                         map<string, string> namesInList;
258                         
259                         //go through listfile and get names
260                         for (int i = 0; i < SharedList->getNumBins(); i++) {
261                                 
262                                 string names = SharedList->get(i); 
263                 
264                                 while (names.find_first_of(',') != -1) { 
265                                         string name = names.substr(0,names.find_first_of(','));
266                                         names = names.substr(names.find_first_of(',')+1, names.length());
267                                         
268                                         namesInList[name] = name;
269                                 }
270                                 
271                                 //get last name
272                                 namesInList[names] = names;                             
273                         }
274                         
275                         //get names of sequences in groupfile
276                         vector<string> seqNames = groupMap->getNamesSeqs();
277                 
278                         map<string, string>::iterator itMatch;
279                         
280                         openOutputFile(outputMisMatchName, outMisMatch);
281                         
282                         //loop through names in seqNames and if they aren't in namesIn list output them
283                         for (int i = 0; i < seqNames.size(); i++) {
284                                 
285                                 itMatch = namesInList.find(seqNames[i]);
286                                 
287                                 if (itMatch == namesInList.end()) {
288                                 
289                                         outMisMatch << seqNames[i] << endl; 
290                                 }
291                         }               
292                         outMisMatch.close();
293                 }
294  
295         }
296         catch(exception& e) {
297                 errorOut(e, "SharedCommand", "createMisMatchFile");
298                 exit(1);
299         }
300 }
301
302 //**********************************************************************************************************************
303
304 SharedCommand::~SharedCommand(){
305         //delete list;
306         
307         
308 }
309
310 //**********************************************************************************************************************
311
312 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
313         try {
314                 for (int i = 0; i < groups.size(); i++) {
315                         if (groupname == groups[i]) { return true; }
316                 }
317                 
318                 return false;
319         }
320         catch(exception& e) {
321                 errorOut(e, "SharedCommand", "isValidGroup");
322                 exit(1);
323         }
324 }
325 /************************************************************/
326
327