5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parselistcommand.h"
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
15 globaldata = GlobalData::getInstance();
17 //read in group map info.
18 //groupMap = new GroupMap(globaldata->getGroupFile());
19 //groupMap->readMap();
20 groupMap = globaldata->gGroupmap;
22 //fill filehandles with neccessary ofstreams
25 SharedListVector* templist;
26 for (i=0; i<groupMap->getNumGroups(); i++) {
28 templist = new SharedListVector();
29 filehandles[groupMap->namesOfGroups[i]] = temp;
30 mapOfLists[groupMap->namesOfGroups[i]] = templist;
34 fileroot = getRootName(globaldata->getListFile());
36 //clears file before we start to write to it below
37 for (int i=0; i<groupMap->getNumGroups(); i++) {
38 remove((fileroot + groupMap->namesOfGroups[i] + ".list").c_str());
43 errorOut(e, "ParseListCommand", "ParseListCommand");
48 /***********************************************************************/
49 void ParseListCommand::parse(int index, SharedListVector* list) {
51 string member, bin, groupName;
52 bin = list->get(index);
54 while (bin.find_first_of(',') != -1) {//while you still have sequences
55 member = bin.substr(0,bin.find_first_of(','));
56 if ((bin.find_first_of(',')+1) <= bin.length()) { //checks to make sure you don't have comma at end of string
57 bin = bin.substr(bin.find_first_of(',')+1, bin.length());
60 groupName = groupMap->getGroup(member);
61 if (groupName != "not found") {
62 listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group.
64 mothurOut("Error: Sequence '" + toString(member) + "' was not found in the group file, please correct\n");
68 //save last name after comma
69 groupName = groupMap->getGroup(bin);
70 if (groupName != "not found") {
71 listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group.
73 mothurOut("Error: Sequence '" + toString(bin) + "' was not found in the group file, please correct\n");
77 errorOut(e, "ParseListCommand", "parse");
82 //**********************************************************************************************************************
84 int ParseListCommand::execute(){
90 read = new ReadOTUFile(globaldata->inputFileName);
91 read->read(&*globaldata);
92 input = globaldata->ginput;
93 list = globaldata->gSharedList;
94 string lastLabel = list->getLabel();
96 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
97 set<string> processedLabels;
98 set<string> userLabels = globaldata->labels;
99 set<int> userLines = globaldata->lines;
101 //parses and sets each groups listvector
102 //as long as you are not at the end of the file or done wih the lines you want
103 while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
105 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lastLabel) == 1){
106 mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
109 processedLabels.insert(list->getLabel());
110 userLabels.erase(list->getLabel());
111 userLines.erase(count);
114 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
116 list = input->getSharedListVector(lastLabel);
118 mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
121 processedLabels.insert(list->getLabel());
122 userLabels.erase(list->getLabel());
126 lastLabel = list->getLabel();
129 list = input->getSharedListVector();
133 //output error messages about any remaining user labels
134 set<string>::iterator it;
135 bool needToRun = false;
136 for (it = userLabels.begin(); it != userLabels.end(); it++) {
137 mothurOut("Your file does not include the label " + *it);
138 if (processedLabels.count(lastLabel) != 1) {
139 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
142 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
146 //run last line if you need to
147 if (needToRun == true) {
149 list = input->getSharedListVector(lastLabel);
151 mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
156 globaldata->gSharedList = NULL;
157 //delete list vectors to fill with parsed data
158 for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) {
161 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
165 delete input; globaldata->ginput = NULL;
171 catch(exception& e) {
172 errorOut(e, "ParseListCommand", "execute");
176 //**********************************************************************************************************************
178 ParseListCommand::~ParseListCommand(){
182 //**********************************************************************************************************************
183 void ParseListCommand::process(SharedListVector* thisList) {
187 for(int i=0; i<thisList->size(); i++) {
188 parse(i, thisList); //parses data[i] list of sequence names
189 for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors
191 seq = seq.substr(1, seq.length()); //rips off extra comma
192 mapOfLists[it->first]->push_back(seq); //sets new listvector for each group
196 //prints each new list file
197 for (int i=0; i<groupMap->getNumGroups(); i++) {
198 openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
199 mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
200 mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
201 mapOfLists[groupMap->namesOfGroups[i]]->clear();
202 (*(filehandles[groupMap->namesOfGroups[i]])).close();
206 catch(exception& e) {
207 errorOut(e, "ParseListCommand", "process");