5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parselistcommand.h"
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
15 globaldata = GlobalData::getInstance();
17 //read in group map info.
18 groupMap = new GroupMap(globaldata->getGroupFile());
21 //fill filehandles with neccessary ofstreams
24 for (i=0; i<groupMap->getNumGroups(); i++) {
26 filehandles[groupMap->namesOfGroups[i]] = temp;
30 fileroot = getRootName(globaldata->getListFile());
32 //open output list files
33 for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
34 openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
38 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
42 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46 /***********************************************************************/
47 void ParseListCommand::parse(int index, SharedListVector* list) {
49 string prefix, suffix, groupsName;
50 suffix = list->get(index);
52 while (suffix.find_first_of(',') != -1) {//while you still have sequences
53 prefix = suffix.substr(0,suffix.find_first_of(','));
54 if ((suffix.find_first_of(',')+1) <= suffix.length()) { //checks to make sure you don't have comma at end of string
55 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
58 groupsName = groupMap->getGroup(prefix);
59 if (groupsName != "not found") {
60 listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
62 cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
66 //save last name after comma
67 groupsName = groupMap->getGroup(suffix);
68 if (groupsName != "not found") {
69 listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
71 cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
75 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
79 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84 //**********************************************************************************************************************
86 int ParseListCommand::execute(){
91 read = new ReadOTUFile(globaldata->inputFileName);
92 read->read(&*globaldata);
93 input = globaldata->ginput;
94 list = globaldata->gSharedList;
95 SharedListVector* lastList = list;
97 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
98 set<string> processedLabels;
99 set<string> userLabels = globaldata->labels;
100 set<int> userLines = globaldata->lines;
102 //create new list vectors to fill with parsed data
103 for (int i=0; i<groupMap->getNumGroups(); i++) {
104 groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector();
108 //parses and sets each groups listvector
109 //as long as you are not at the end of the file or done wih the lines you want
110 while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
112 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
113 cout << list->getLabel() << '\t' << count << endl;
116 processedLabels.insert(list->getLabel());
117 userLabels.erase(list->getLabel());
118 userLines.erase(count);
121 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
122 cout << lastList->getLabel() << '\t' << count << endl;
125 processedLabels.insert(lastList->getLabel());
126 userLabels.erase(lastList->getLabel());
129 if (count != 1) { delete lastList; }
132 list = input->getSharedListVector();
136 //output error messages about any remaining user labels
137 set<string>::iterator it;
138 bool needToRun = false;
139 for (it = userLabels.begin(); it != userLabels.end(); it++) {
140 cout << "Your file does not include the label "<< *it;
141 if (processedLabels.count(lastList->getLabel()) != 1) {
142 cout << ". I will use " << lastList->getLabel() << "." << endl;
145 cout << ". Please refer to " << lastList->getLabel() << "." << endl;
149 //run last line if you need to
150 if (needToRun == true) {
151 cout << lastList->getLabel() << '\t' << count << endl;
156 //set groupmap for .shared commands
157 if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
158 globaldata->gGroupmap = groupMap;
161 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
162 ofstream* temp = it3->second;
167 //delete list vectors to fill with parsed data
168 for (it2 = groupOfLists.begin(); it2 != groupOfLists.end(); it2++) {
174 catch(exception& e) {
175 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
184 //**********************************************************************************************************************
186 ParseListCommand::~ParseListCommand(){
191 //**********************************************************************************************************************
192 void ParseListCommand::process(SharedListVector* thisList) {
196 for(int i=0; i<thisList->size(); i++) {
197 parse(i, thisList); //parses data[i] list of sequence names
198 for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors
200 seq = seq.substr(1, seq.length()); //rips off extra comma
201 groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
205 //prints each new list file
206 for (int i=0; i<groupMap->getNumGroups(); i++) {
207 groupOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
208 groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
209 groupOfLists[groupMap->namesOfGroups[i]]->clear();
213 catch(exception& e) {
214 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";