X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=parselistcommand.cpp;h=0fdcf9ad2dd3e76ed04630096895cf9c81385d81;hb=510b1cfc25cd79391d6973ca20c5ec25fb1bb3b2;hp=5379052d6dd4f1bbfe51b3b346874def78a7b841;hpb=4761e165b4a196fefa57755d3176d9ced19df6b1;p=mothur.git diff --git a/parselistcommand.cpp b/parselistcommand.cpp index 5379052..0fdcf9a 100644 --- a/parselistcommand.cpp +++ b/parselistcommand.cpp @@ -15,68 +15,66 @@ ParseListCommand::ParseListCommand(){ globaldata = GlobalData::getInstance(); //read in group map info. - groupMap = new GroupMap(globaldata->getGroupFile()); - groupMap->readMap(); - + //groupMap = new GroupMap(globaldata->getGroupFile()); + //groupMap->readMap(); + groupMap = globaldata->gGroupmap; + //fill filehandles with neccessary ofstreams int i; ofstream* temp; + SharedListVector* templist; for (i=0; igetNumGroups(); i++) { temp = new ofstream; + templist = new SharedListVector(); filehandles[groupMap->namesOfGroups[i]] = temp; + mapOfLists[groupMap->namesOfGroups[i]] = templist; } //set fileroot fileroot = getRootName(globaldata->getListFile()); - //open output list files - for (i=0; igetNumGroups(); i++) {//opens an output file for each group - openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]])); + //clears file before we start to write to it below + for (int i=0; igetNumGroups(); i++) { + remove((fileroot + groupMap->namesOfGroups[i] + ".list").c_str()); } + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ParseListCommand", "ParseListCommand"); exit(1); } + } /***********************************************************************/ -void ParseListCommand::parse(int index) { +void ParseListCommand::parse(int index, SharedListVector* list) { try { - string prefix, suffix, groupsName; - suffix = list->get(index); - - while (suffix.find_first_of(',') != -1) {//while you still have sequences - prefix = suffix.substr(0,suffix.find_first_of(',')); - if ((suffix.find_first_of(',')+1) <= suffix.length()) { //checks to make sure you don't have comma at end of string - suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length()); + string member, bin, groupName; + bin = list->get(index); + + while (bin.find_first_of(',') != -1) {//while you still have sequences + member = bin.substr(0,bin.find_first_of(',')); + if ((bin.find_first_of(',')+1) <= bin.length()) { //checks to make sure you don't have comma at end of string + bin = bin.substr(bin.find_first_of(',')+1, bin.length()); } - groupsName = groupMap->getGroup(prefix); - if (groupsName != "not found") { - listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group. + groupName = groupMap->getGroup(member); + if (groupName != "not found") { + listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group. }else { - cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n"; + mothurOut("Error: Sequence '" + toString(member) + "' was not found in the group file, please correct\n"); } } //save last name after comma - groupsName = groupMap->getGroup(suffix); - if (groupsName != "not found") { - listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group. + groupName = groupMap->getGroup(bin); + if (groupName != "not found") { + listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group. }else { - cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n"; + mothurOut("Error: Sequence '" + toString(bin) + "' was not found in the group file, please correct\n"); } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ParseListCommand", "parse"); exit(1); } } @@ -85,66 +83,128 @@ void ParseListCommand::parse(int index) { int ParseListCommand::execute(){ try{ - globaldata = GlobalData::getInstance(); + + int count = 1; //read in listfile - read = new ReadPhilFile(globaldata->inputFileName); + read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); input = globaldata->ginput; list = globaldata->gSharedList; - - //read in group map info. - groupMap = new GroupMap(globaldata->getGroupFile()); - groupMap->readMap(); - - string seq, label; - int i; - //create new list vectors to fill with parsed data - for (i=0; igetNumGroups(); i++) { - groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector(); - } - + string lastLabel = list->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + set userLines = globaldata->lines; + //parses and sets each groups listvector - while(list != NULL){ - label = list->getLabel(); - for(i=0; isize(); i++) { - parse(i); //parses data[i] list of sequence names - for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors - seq = it->second; - seq = seq.substr(1, seq.length()); //rips off extra comma - groupOfLists[it->first]->push_back(seq); //sets new listvector for each group - } - listGroups.clear(); + //as long as you are not at the end of the file or done wih the lines you want + while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lastLabel) == 1){ + mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine(); + process(list); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + userLines.erase(count); } - //prints each new list file - for (i=0; igetNumGroups(); i++) { - groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label); - groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); - groupOfLists[groupMap->namesOfGroups[i]]->clear(); + + if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + delete list; + list = input->getSharedListVector(lastLabel); + + mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine(); + process(list); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); } + + + lastLabel = list->getLabel(); + + delete list; list = input->getSharedListVector(); + count++; } - //set groupmap for .shared commands - globaldata->gGroupmap = groupMap; + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine(); + needToRun = true; + }else { + mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine(); + } + } + + //run last line if you need to + if (needToRun == true) { + delete list; + list = input->getSharedListVector(lastLabel); + + mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine(); + process(list); + delete list; + } + + globaldata->gSharedList = NULL; + //delete list vectors to fill with parsed data + for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) { + delete it2->second; + } + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + delete it2->second; + } + + delete input; globaldata->ginput = NULL; + delete read; + return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ParseListCommand", "execute"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - } //********************************************************************************************************************** ParseListCommand::~ParseListCommand(){ - delete list; - delete input; - delete read; + + } //********************************************************************************************************************** +void ParseListCommand::process(SharedListVector* thisList) { + try { + string seq; + + for(int i=0; isize(); i++) { + parse(i, thisList); //parses data[i] list of sequence names + for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors + seq = it->second; + seq = seq.substr(1, seq.length()); //rips off extra comma + mapOfLists[it->first]->push_back(seq); //sets new listvector for each group + } + listGroups.clear(); + } + //prints each new list file + for (int i=0; igetNumGroups(); i++) { + openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]])); + mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel()); + mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); + mapOfLists[groupMap->namesOfGroups[i]]->clear(); + (*(filehandles[groupMap->namesOfGroups[i]])).close(); + } + + } + catch(exception& e) { + errorOut(e, "ParseListCommand", "process"); + exit(1); + } +}