5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
12 //**********************************************************************************************************************
14 SharedCommand::SharedCommand(string o) : outputDir(o) {
16 globaldata = GlobalData::getInstance();
18 //getting output filename
19 filename = globaldata->inputFileName;
20 if (outputDir == "") { outputDir += hasPath(filename); }
22 filename = outputDir + getRootName(getSimpleName(filename));
23 filename = filename + "shared";
25 openOutputFile(filename, out);
28 groupMap = globaldata->gGroupmap;
30 //if hte user has not specified any groups then use them all
31 if (globaldata->Groups.size() == 0) {
32 groups = groupMap->namesOfGroups;
33 }else{ //they have specified groups
34 groups = globaldata->Groups;
38 //fill filehandles with neccessary ofstreams
41 for (i=0; i<groups.size(); i++) {
43 filehandles[groups[i]] = temp;
47 fileroot = outputDir + getRootName(getSimpleName(globaldata->getListFile()));
49 //clears file before we start to write to it below
50 for (int i=0; i<groups.size(); i++) {
51 remove((fileroot + groups[i] + ".rabund").c_str());
56 errorOut(e, "SharedCommand", "SharedCommand");
60 //**********************************************************************************************************************
62 int SharedCommand::execute(){
66 string errorOff = "no error";
70 read = new ReadOTUFile(globaldata->inputFileName);
71 read->read(&*globaldata);
74 input = globaldata->ginput;
75 SharedList = globaldata->gSharedList;
76 string lastLabel = SharedList->getLabel();
77 vector<SharedRAbundVector*> lookup;
79 if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
80 mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine();
83 remove(filename.c_str()); //remove blank shared file you made
88 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
92 globaldata->gSharedList = NULL;
97 //if user has specified groups make new groupfile for them
98 if (globaldata->Groups.size() != 0) { //make new group file
100 for (int i = 0; i < globaldata->Groups.size(); i++) {
101 groups += globaldata->Groups[i] + ".";
104 string newGroupFile = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + groups + "groups";
106 openOutputFile(newGroupFile, outGroups);
108 vector<string> names = groupMap->getNamesSeqs();
110 for (int i = 0; i < names.size(); i++) {
111 groupName = groupMap->getGroup(names[i]);
112 if (isValidGroup(groupName, globaldata->Groups)) {
113 outGroups << names[i] << '\t' << groupName << endl;
119 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
120 set<string> processedLabels;
121 set<string> userLabels = globaldata->labels;
123 while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
125 if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
127 lookup = SharedList->getSharedRAbundVector();
128 if (pickedGroups) { //check for otus with no seqs in them
129 eliminateZeroOTUS(lookup);
131 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
133 printSharedData(lookup); //prints info to the .shared file
134 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
136 processedLabels.insert(SharedList->getLabel());
137 userLabels.erase(SharedList->getLabel());
140 if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
141 string saveLabel = SharedList->getLabel();
144 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
146 lookup = SharedList->getSharedRAbundVector();
147 if (pickedGroups) { //check for otus with no seqs in them
148 eliminateZeroOTUS(lookup);
150 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
152 printSharedData(lookup); //prints info to the .shared file
153 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
155 processedLabels.insert(SharedList->getLabel());
156 userLabels.erase(SharedList->getLabel());
158 //restore real lastlabel to save below
159 SharedList->setLabel(saveLabel);
163 lastLabel = SharedList->getLabel();
166 SharedList = input->getSharedListVector(); //get new list vector to process
169 //output error messages about any remaining user labels
170 set<string>::iterator it;
171 bool needToRun = false;
172 for (it = userLabels.begin(); it != userLabels.end(); it++) {
173 if (processedLabels.count(lastLabel) != 1) {
178 //run last label if you need to
179 if (needToRun == true) {
180 if (SharedList != NULL) { delete SharedList; }
181 SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
183 lookup = SharedList->getSharedRAbundVector();
184 if (pickedGroups) { //check for otus with no seqs in them
185 eliminateZeroOTUS(lookup);
187 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
189 printSharedData(lookup); //prints info to the .shared file
190 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
194 globaldata->gSharedList = NULL;
198 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
203 //change format to shared to speed up commands
204 globaldata->setFormat("sharedfile");
205 globaldata->setListFile("");
206 globaldata->setGroupFile("");
207 globaldata->setSharedFile(filename);
212 catch(exception& e) {
213 errorOut(e, "SharedCommand", "execute");
217 //**********************************************************************************************************************
218 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
221 //initialize bin values
222 for (int i = 0; i < thislookup.size(); i++) {
223 //cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << endl;
224 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
225 thislookup[i]->print(out);
227 RAbundVector rav = thislookup[i]->getRAbundVector();
228 openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
229 rav.print(*(filehandles[thislookup[i]->getGroup()]));
230 (*(filehandles[thislookup[i]->getGroup()])).close();
234 catch(exception& e) {
235 errorOut(e, "SharedCommand", "printSharedData");
239 //**********************************************************************************************************************
240 void SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
243 vector<SharedRAbundVector*> newLookup;
244 for (int i = 0; i < thislookup.size(); i++) {
245 SharedRAbundVector* temp = new SharedRAbundVector();
246 temp->setLabel(thislookup[i]->getLabel());
247 temp->setGroup(thislookup[i]->getGroup());
248 newLookup.push_back(temp);
252 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
254 //look at each sharedRabund and make sure they are not all zero
256 for (int j = 0; j < thislookup.size(); j++) {
257 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
260 //if they are not all zero add this bin
262 for (int j = 0; j < thislookup.size(); j++) {
263 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
266 //else{ cout << "bin # " << i << " is all zeros" << endl; }
269 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
270 thislookup = newLookup;
274 catch(exception& e) {
275 errorOut(e, "SharedCommand", "eliminateZeroOTUS");
279 //**********************************************************************************************************************
280 void SharedCommand::createMisMatchFile() {
282 ofstream outMisMatch;
283 string outputMisMatchName = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
285 //you have sequences in your list file that are not in your group file
286 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
287 outputMisMatchName += "missing.group";
288 mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
290 openOutputFile(outputMisMatchName, outMisMatch);
292 //go through list and if group returns "not found" output it
293 for (int i = 0; i < SharedList->getNumBins(); i++) {
295 string names = SharedList->get(i);
297 while (names.find_first_of(',') != -1) {
298 string name = names.substr(0,names.find_first_of(','));
299 names = names.substr(names.find_first_of(',')+1, names.length());
300 string group = groupMap->getGroup(name);
302 if(group == "not found") { outMisMatch << name << endl; }
306 string group = groupMap->getGroup(names);
307 if(group == "not found") { outMisMatch << names << endl; }
313 }else {//you have sequences in your group file that are not in you list file
315 outputMisMatchName += "missing.name";
316 mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
318 map<string, string> namesInList;
320 //go through listfile and get names
321 for (int i = 0; i < SharedList->getNumBins(); i++) {
323 string names = SharedList->get(i);
325 while (names.find_first_of(',') != -1) {
326 string name = names.substr(0,names.find_first_of(','));
327 names = names.substr(names.find_first_of(',')+1, names.length());
329 namesInList[name] = name;
333 namesInList[names] = names;
336 //get names of sequences in groupfile
337 vector<string> seqNames = groupMap->getNamesSeqs();
339 map<string, string>::iterator itMatch;
341 openOutputFile(outputMisMatchName, outMisMatch);
343 //loop through names in seqNames and if they aren't in namesIn list output them
344 for (int i = 0; i < seqNames.size(); i++) {
346 itMatch = namesInList.find(seqNames[i]);
348 if (itMatch == namesInList.end()) {
350 outMisMatch << seqNames[i] << endl;
357 catch(exception& e) {
358 errorOut(e, "SharedCommand", "createMisMatchFile");
363 //**********************************************************************************************************************
365 SharedCommand::~SharedCommand(){
371 //**********************************************************************************************************************
373 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
375 for (int i = 0; i < groups.size(); i++) {
376 if (groupname == groups[i]) { return true; }
381 catch(exception& e) {
382 errorOut(e, "SharedCommand", "isValidGroup");
386 /************************************************************/