5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parselistcommand.h"
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
15 globaldata = GlobalData::getInstance();
17 //read in group map info.
18 //groupMap = new GroupMap(globaldata->getGroupFile());
19 //groupMap->readMap();
20 groupMap = globaldata->gGroupmap;
22 //fill filehandles with neccessary ofstreams
25 SharedListVector* templist;
26 for (i=0; i<groupMap->getNumGroups(); i++) {
28 templist = new SharedListVector();
29 filehandles[groupMap->namesOfGroups[i]] = temp;
30 mapOfLists[groupMap->namesOfGroups[i]] = templist;
34 fileroot = getRootName(globaldata->getListFile());
36 //clears file before we start to write to it below
37 for (int i=0; i<groupMap->getNumGroups(); i++) {
38 openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
39 (*(filehandles[groupMap->namesOfGroups[i]])).close();
45 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53 /***********************************************************************/
54 void ParseListCommand::parse(int index, SharedListVector* list) {
56 string member, bin, groupName;
57 bin = list->get(index);
59 while (bin.find_first_of(',') != -1) {//while you still have sequences
60 member = bin.substr(0,bin.find_first_of(','));
61 if ((bin.find_first_of(',')+1) <= bin.length()) { //checks to make sure you don't have comma at end of string
62 bin = bin.substr(bin.find_first_of(',')+1, bin.length());
65 groupName = groupMap->getGroup(member);
66 if (groupName != "not found") {
67 listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group.
69 cerr << "Error: Sequence '" << member << "' was not found in the group file, please correct\n";
73 //save last name after comma
74 groupName = groupMap->getGroup(bin);
75 if (groupName != "not found") {
76 listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group.
78 cerr << "Error: Sequence '" << bin << "' was not found in the group file, please correct\n";
82 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
86 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
91 //**********************************************************************************************************************
93 int ParseListCommand::execute(){
99 read = new ReadOTUFile(globaldata->inputFileName);
100 read->read(&*globaldata);
101 input = globaldata->ginput;
102 list = globaldata->gSharedList;
103 SharedListVector* lastList = list;
105 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
106 set<string> processedLabels;
107 set<string> userLabels = globaldata->labels;
108 set<int> userLines = globaldata->lines;
110 //parses and sets each groups listvector
111 //as long as you are not at the end of the file or done wih the lines you want
112 while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
114 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
115 cout << list->getLabel() << '\t' << count << endl;
118 processedLabels.insert(list->getLabel());
119 userLabels.erase(list->getLabel());
120 userLines.erase(count);
123 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
124 cout << lastList->getLabel() << '\t' << count << endl;
127 processedLabels.insert(lastList->getLabel());
128 userLabels.erase(lastList->getLabel());
131 if (count != 1) { delete lastList; }
134 list = input->getSharedListVector();
138 //output error messages about any remaining user labels
139 set<string>::iterator it;
140 bool needToRun = false;
141 for (it = userLabels.begin(); it != userLabels.end(); it++) {
142 cout << "Your file does not include the label "<< *it;
143 if (processedLabels.count(lastList->getLabel()) != 1) {
144 cout << ". I will use " << lastList->getLabel() << "." << endl;
147 cout << ". Please refer to " << lastList->getLabel() << "." << endl;
151 //run last line if you need to
152 if (needToRun == true) {
153 cout << lastList->getLabel() << '\t' << count << endl;
157 delete lastList; globaldata->gSharedList = NULL;
158 //delete list vectors to fill with parsed data
159 for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) {
162 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
166 delete input; globaldata->ginput = NULL;
172 catch(exception& e) {
173 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
177 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182 //**********************************************************************************************************************
184 ParseListCommand::~ParseListCommand(){
188 //**********************************************************************************************************************
189 void ParseListCommand::process(SharedListVector* thisList) {
193 for(int i=0; i<thisList->size(); i++) {
194 parse(i, thisList); //parses data[i] list of sequence names
195 for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors
197 seq = seq.substr(1, seq.length()); //rips off extra comma
198 mapOfLists[it->first]->push_back(seq); //sets new listvector for each group
202 //prints each new list file
203 for (int i=0; i<groupMap->getNumGroups(); i++) {
204 openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
205 mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
206 mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
207 mapOfLists[groupMap->namesOfGroups[i]]->clear();
208 (*(filehandles[groupMap->namesOfGroups[i]])).close();
212 catch(exception& e) {
213 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
217 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";