5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
12 //**********************************************************************************************************************
14 SharedCommand::SharedCommand(string o) : outputDir(o) {
16 globaldata = GlobalData::getInstance();
18 //getting output filename
19 filename = globaldata->inputFileName;
20 if (outputDir == "") { outputDir += hasPath(filename); }
22 filename = outputDir + getRootName(getSimpleName(filename));
23 filename = filename + "shared";
25 openOutputFile(filename, out);
28 groupMap = globaldata->gGroupmap;
30 //if hte user has not specified any groups then use them all
31 if (globaldata->Groups.size() == 0) {
32 groups = groupMap->namesOfGroups;
33 }else{ //they have specified groups
34 groups = globaldata->Groups;
38 //fill filehandles with neccessary ofstreams
41 for (i=0; i<groups.size(); i++) {
43 filehandles[groups[i]] = temp;
47 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
49 //clears file before we start to write to it below
50 for (int i=0; i<groups.size(); i++) {
51 remove((fileroot + groups[i] + ".rabund").c_str());
56 errorOut(e, "SharedCommand", "SharedCommand");
60 //**********************************************************************************************************************
62 int SharedCommand::execute(){
66 string errorOff = "no error";
68 cout << globaldata->inputFileName << endl;
70 read = new ReadOTUFile(globaldata->inputFileName);
71 read->read(&*globaldata);
74 input = globaldata->ginput;
75 SharedList = globaldata->gSharedList;
76 string lastLabel = SharedList->getLabel();
77 vector<SharedRAbundVector*> lookup;
79 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
80 mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine();
83 remove(filename.c_str()); //remove blank shared file you made
88 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
92 globaldata->gSharedList = NULL;
97 //if user has specified groups make new groupfile for them
98 if (globaldata->Groups.size() != 0) { //make new group file
100 for (int i = 0; i < globaldata->Groups.size(); i++) {
101 groups += globaldata->Groups[i] + ".";
104 string newGroupFile = getRootName(globaldata->inputFileName) + groups + "groups";
106 openOutputFile(newGroupFile, outGroups);
108 vector<string> names = groupMap->getNamesSeqs();
110 for (int i = 0; i < names.size(); i++) {
111 groupName = groupMap->getGroup(names[i]);
112 if (isValidGroup(groupName, globaldata->Groups)) {
113 outGroups << names[i] << '\t' << groupName << endl;
119 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
120 set<string> processedLabels;
121 set<string> userLabels = globaldata->labels;
123 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
125 if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
127 lookup = SharedList->getSharedRAbundVector();
128 if (pickedGroups) { //check for otus with no seqs in them
129 eliminateZeroOTUS(lookup);
131 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
133 printSharedData(lookup); //prints info to the .shared file
134 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
136 processedLabels.insert(SharedList->getLabel());
137 userLabels.erase(SharedList->getLabel());
140 if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
141 string saveLabel = SharedList->getLabel();
144 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
146 lookup = SharedList->getSharedRAbundVector();
147 if (pickedGroups) { //check for otus with no seqs in them
148 eliminateZeroOTUS(lookup);
150 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
152 printSharedData(lookup); //prints info to the .shared file
153 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
155 processedLabels.insert(SharedList->getLabel());
156 userLabels.erase(SharedList->getLabel());
158 //restore real lastlabel to save below
159 SharedList->setLabel(saveLabel);
163 lastLabel = SharedList->getLabel();
166 SharedList = input->getSharedListVector(); //get new list vector to process
169 //output error messages about any remaining user labels
170 set<string>::iterator it;
171 bool needToRun = false;
172 for (it = userLabels.begin(); it != userLabels.end(); it++) {
173 if (processedLabels.count(lastLabel) != 1) {
178 //run last label if you need to
179 if (needToRun == true) {
180 if (SharedList != NULL) { delete SharedList; }
181 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
183 lookup = SharedList->getSharedRAbundVector();
184 if (pickedGroups) { //check for otus with no seqs in them
185 eliminateZeroOTUS(lookup);
187 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
189 printSharedData(lookup); //prints info to the .shared file
190 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
194 globaldata->gSharedList = NULL;
198 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
202 globaldata->setSharedFile(filename);
206 catch(exception& e) {
207 errorOut(e, "SharedCommand", "execute");
211 //**********************************************************************************************************************
212 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
215 //initialize bin values
216 for (int i = 0; i < thislookup.size(); i++) {
217 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << endl;
218 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
219 thislookup[i]->print(out);
221 RAbundVector rav = thislookup[i]->getRAbundVector();
222 openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
223 rav.print(*(filehandles[thislookup[i]->getGroup()]));
224 (*(filehandles[thislookup[i]->getGroup()])).close();
228 catch(exception& e) {
229 errorOut(e, "SharedCommand", "printSharedData");
233 //**********************************************************************************************************************
234 void SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
237 vector<SharedRAbundVector*> newLookup;
238 for (int i = 0; i < thislookup.size(); i++) {
239 SharedRAbundVector* temp = new SharedRAbundVector();
240 temp->setLabel(thislookup[i]->getLabel());
241 temp->setGroup(thislookup[i]->getGroup());
242 newLookup.push_back(temp);
246 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
248 //look at each sharedRabund and make sure they are not all zero
250 for (int j = 0; j < thislookup.size(); j++) {
251 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
254 //if they are not all zero add this bin
256 for (int j = 0; j < thislookup.size(); j++) {
257 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
260 //else{ cout << "bin # " << i << " is all zeros" << endl; }
263 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
264 thislookup = newLookup;
268 catch(exception& e) {
269 errorOut(e, "SharedCommand", "eliminateZeroOTUS");
273 //**********************************************************************************************************************
274 void SharedCommand::createMisMatchFile() {
276 ofstream outMisMatch;
277 string outputMisMatchName = getRootName(globaldata->inputFileName);
279 //you have sequences in your list file that are not in your group file
280 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
281 outputMisMatchName += "missing.group";
282 mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
284 openOutputFile(outputMisMatchName, outMisMatch);
286 //go through list and if group returns "not found" output it
287 for (int i = 0; i < SharedList->getNumBins(); i++) {
289 string names = SharedList->get(i);
291 while (names.find_first_of(',') != -1) {
292 string name = names.substr(0,names.find_first_of(','));
293 names = names.substr(names.find_first_of(',')+1, names.length());
294 string group = groupMap->getGroup(name);
296 if(group == "not found") { outMisMatch << name << endl; }
300 string group = groupMap->getGroup(names);
301 if(group == "not found") { outMisMatch << names << endl; }
307 }else {//you have sequences in your group file that are not in you list file
309 outputMisMatchName += "missing.name";
310 mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
312 map<string, string> namesInList;
314 //go through listfile and get names
315 for (int i = 0; i < SharedList->getNumBins(); i++) {
317 string names = SharedList->get(i);
319 while (names.find_first_of(',') != -1) {
320 string name = names.substr(0,names.find_first_of(','));
321 names = names.substr(names.find_first_of(',')+1, names.length());
323 namesInList[name] = name;
327 namesInList[names] = names;
330 //get names of sequences in groupfile
331 vector<string> seqNames = groupMap->getNamesSeqs();
333 map<string, string>::iterator itMatch;
335 openOutputFile(outputMisMatchName, outMisMatch);
337 //loop through names in seqNames and if they aren't in namesIn list output them
338 for (int i = 0; i < seqNames.size(); i++) {
340 itMatch = namesInList.find(seqNames[i]);
342 if (itMatch == namesInList.end()) {
344 outMisMatch << seqNames[i] << endl;
351 catch(exception& e) {
352 errorOut(e, "SharedCommand", "createMisMatchFile");
357 //**********************************************************************************************************************
359 SharedCommand::~SharedCommand(){
365 //**********************************************************************************************************************
367 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
369 for (int i = 0; i < groups.size(); i++) {
370 if (groupname == groups[i]) { return true; }
375 catch(exception& e) {
376 errorOut(e, "SharedCommand", "isValidGroup");
380 /************************************************************/