5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
12 //**********************************************************************************************************************
14 SharedCommand::SharedCommand(string o) : outputDir(o) {
16 globaldata = GlobalData::getInstance();
18 //getting output filename
19 filename = globaldata->inputFileName;
20 if (outputDir == "") { outputDir += hasPath(filename); }
22 filename = outputDir + getRootName(getSimpleName(filename));
23 filename = filename + "shared";
25 openOutputFile(filename, out);
28 groupMap = globaldata->gGroupmap;
30 //if hte user has not specified any groups then use them all
31 if (globaldata->Groups.size() == 0) {
32 groups = groupMap->namesOfGroups;
33 }else{ //they have specified groups
34 groups = globaldata->Groups;
38 //fill filehandles with neccessary ofstreams
41 for (i=0; i<groups.size(); i++) {
43 filehandles[groups[i]] = temp;
47 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
49 //clears file before we start to write to it below
50 for (int i=0; i<groups.size(); i++) {
51 remove((fileroot + groups[i] + ".rabund").c_str());
52 outputNames.push_back((fileroot + groups[i] + ".rabund"));
57 m->errorOut(e, "SharedCommand", "SharedCommand");
61 //**********************************************************************************************************************
63 int SharedCommand::execute(){
67 string errorOff = "no error";
71 read = new ReadOTUFile(globaldata->inputFileName);
72 read->read(&*globaldata);
75 input = globaldata->ginput;
76 SharedList = globaldata->gSharedList;
77 string lastLabel = SharedList->getLabel();
78 vector<SharedRAbundVector*> lookup;
80 if (m->control_pressed) {
81 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL;
82 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
83 out.close(); remove(filename.c_str());
84 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
88 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
89 m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
92 remove(filename.c_str()); //remove blank shared file you made
97 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
101 globaldata->ginput = NULL;
103 globaldata->gSharedList = NULL;
108 //if user has specified groups make new groupfile for them
109 if (globaldata->Groups.size() != 0) { //make new group file
111 for (int i = 0; i < globaldata->Groups.size(); i++) {
112 groups += globaldata->Groups[i] + ".";
115 string newGroupFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + groups + "groups";
117 openOutputFile(newGroupFile, outGroups);
119 vector<string> names = groupMap->getNamesSeqs();
121 for (int i = 0; i < names.size(); i++) {
122 groupName = groupMap->getGroup(names[i]);
123 if (isValidGroup(groupName, globaldata->Groups)) {
124 outGroups << names[i] << '\t' << groupName << endl;
130 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
131 set<string> processedLabels;
132 set<string> userLabels = globaldata->labels;
134 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
135 if (m->control_pressed) {
136 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL;
137 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
138 out.close(); remove(filename.c_str());
139 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
143 if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
145 lookup = SharedList->getSharedRAbundVector();
146 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
147 if (pickedGroups) { //check for otus with no seqs in them
148 eliminateZeroOTUS(lookup);
151 if (m->control_pressed) {
152 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL;
153 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
154 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
155 out.close(); remove(filename.c_str());
156 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
160 printSharedData(lookup); //prints info to the .shared file
161 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
163 processedLabels.insert(SharedList->getLabel());
164 userLabels.erase(SharedList->getLabel());
167 if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
168 string saveLabel = SharedList->getLabel();
171 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
173 lookup = SharedList->getSharedRAbundVector();
174 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
175 if (pickedGroups) { //check for otus with no seqs in them
176 eliminateZeroOTUS(lookup);
180 if (m->control_pressed) {
181 delete input; delete SharedList; globaldata->ginput = NULL; globaldata->gSharedList = NULL;
182 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
183 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
184 out.close(); remove(filename.c_str());
185 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
189 printSharedData(lookup); //prints info to the .shared file
190 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
192 processedLabels.insert(SharedList->getLabel());
193 userLabels.erase(SharedList->getLabel());
195 //restore real lastlabel to save below
196 SharedList->setLabel(saveLabel);
200 lastLabel = SharedList->getLabel();
203 SharedList = input->getSharedListVector(); //get new list vector to process
206 //output error messages about any remaining user labels
207 set<string>::iterator it;
208 bool needToRun = false;
209 for (it = userLabels.begin(); it != userLabels.end(); it++) {
210 if (processedLabels.count(lastLabel) != 1) {
215 //run last label if you need to
216 if (needToRun == true) {
217 if (SharedList != NULL) { delete SharedList; }
218 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
220 lookup = SharedList->getSharedRAbundVector();
221 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
222 if (pickedGroups) { //check for otus with no seqs in them
223 eliminateZeroOTUS(lookup);
226 if (m->control_pressed) {
227 delete input; globaldata->ginput = NULL;
228 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
229 out.close(); remove(filename.c_str());
230 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
234 printSharedData(lookup); //prints info to the .shared file
235 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
239 globaldata->gSharedList = NULL;
243 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
248 //change format to shared to speed up commands
249 globaldata->setFormat("sharedfile");
250 globaldata->setListFile("");
251 globaldata->setGroupFile("");
252 globaldata->setSharedFile(filename);
254 if (m->control_pressed) {
255 delete input; globaldata->ginput = NULL;
256 remove(filename.c_str());
257 for (int i=0; i<groups.size(); i++) { remove((fileroot + groups[i] + ".rabund").c_str()); }
261 m->mothurOutEndLine();
262 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
263 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
264 m->mothurOut(filename); m->mothurOutEndLine();
265 m->mothurOutEndLine();
269 catch(exception& e) {
270 m->errorOut(e, "SharedCommand", "execute");
274 //**********************************************************************************************************************
275 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
278 //initialize bin values
279 for (int i = 0; i < thislookup.size(); i++) {
280 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << endl;
281 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
282 thislookup[i]->print(out);
284 RAbundVector rav = thislookup[i]->getRAbundVector();
285 openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
286 rav.print(*(filehandles[thislookup[i]->getGroup()]));
287 (*(filehandles[thislookup[i]->getGroup()])).close();
291 catch(exception& e) {
292 m->errorOut(e, "SharedCommand", "printSharedData");
296 //**********************************************************************************************************************
297 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
300 vector<SharedRAbundVector*> newLookup;
301 for (int i = 0; i < thislookup.size(); i++) {
302 SharedRAbundVector* temp = new SharedRAbundVector();
303 temp->setLabel(thislookup[i]->getLabel());
304 temp->setGroup(thislookup[i]->getGroup());
305 newLookup.push_back(temp);
309 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
310 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
312 //look at each sharedRabund and make sure they are not all zero
314 for (int j = 0; j < thislookup.size(); j++) {
315 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
318 //if they are not all zero add this bin
320 for (int j = 0; j < thislookup.size(); j++) {
321 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
324 //else{ cout << "bin # " << i << " is all zeros" << endl; }
327 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
328 thislookup = newLookup;
333 catch(exception& e) {
334 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
338 //**********************************************************************************************************************
339 int SharedCommand::createMisMatchFile() {
341 ofstream outMisMatch;
342 string outputMisMatchName = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
344 //you have sequences in your list file that are not in your group file
345 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
346 outputMisMatchName += "missing.group";
347 m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
349 openOutputFile(outputMisMatchName, outMisMatch);
351 map<string, string> listNames;
352 map<string, string>::iterator itList;
354 //go through list and if group returns "not found" output it
355 for (int i = 0; i < SharedList->getNumBins(); i++) {
356 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; }
358 string names = SharedList->get(i);
360 while (names.find_first_of(',') != -1) {
361 string name = names.substr(0,names.find_first_of(','));
362 names = names.substr(names.find_first_of(',')+1, names.length());
363 string group = groupMap->getGroup(name);
365 if(group == "not found") { outMisMatch << name << endl; }
367 itList = listNames.find(name);
368 if (itList != listNames.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
369 else { listNames[name] = name; }
373 string group = groupMap->getGroup(names);
374 if(group == "not found") { outMisMatch << names << endl; }
376 itList = listNames.find(names);
377 if (itList != listNames.end()) { m->mothurOut(names + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
378 else { listNames[names] = names; }
385 }else {//you have sequences in your group file that are not in you list file
387 outputMisMatchName += "missing.name";
388 m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
390 map<string, string> namesInList;
391 map<string, string>::iterator itList;
393 //go through listfile and get names
394 for (int i = 0; i < SharedList->getNumBins(); i++) {
395 if (m->control_pressed) { return 0; }
398 string names = SharedList->get(i);
400 while (names.find_first_of(',') != -1) {
401 string name = names.substr(0,names.find_first_of(','));
402 names = names.substr(names.find_first_of(',')+1, names.length());
404 itList = namesInList.find(name);
405 if (itList != namesInList.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
407 namesInList[name] = name;
411 itList = namesInList.find(names);
412 if (itList != namesInList.end()) { m->mothurOut(names + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
415 namesInList[names] = names;
418 //get names of sequences in groupfile
419 vector<string> seqNames = groupMap->getNamesSeqs();
421 map<string, string>::iterator itMatch;
423 openOutputFile(outputMisMatchName, outMisMatch);
425 //loop through names in seqNames and if they aren't in namesIn list output them
426 for (int i = 0; i < seqNames.size(); i++) {
427 if (m->control_pressed) { outMisMatch.close(); remove(outputMisMatchName.c_str()); return 0; }
429 itMatch = namesInList.find(seqNames[i]);
431 if (itMatch == namesInList.end()) {
433 outMisMatch << seqNames[i] << endl;
441 catch(exception& e) {
442 m->errorOut(e, "SharedCommand", "createMisMatchFile");
447 //**********************************************************************************************************************
449 SharedCommand::~SharedCommand(){
455 //**********************************************************************************************************************
457 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
459 for (int i = 0; i < groups.size(); i++) {
460 if (groupname == groups[i]) { return true; }
465 catch(exception& e) {
466 m->errorOut(e, "SharedCommand", "isValidGroup");
470 /************************************************************/