]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
added logfile feature
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 //groupMap = new GroupMap(globaldata->getGroupFile());
19                 //groupMap->readMap();
20                 groupMap = globaldata->gGroupmap;
21
22                 //fill filehandles with neccessary ofstreams
23                 int i;
24                 ofstream* temp;
25                 SharedListVector* templist;
26                 for (i=0; i<groupMap->getNumGroups(); i++) {
27                         temp = new ofstream;
28                         templist = new SharedListVector();
29                         filehandles[groupMap->namesOfGroups[i]] = temp;
30                         mapOfLists[groupMap->namesOfGroups[i]] = templist;
31                 }
32                 
33                 //set fileroot
34                 fileroot = getRootName(globaldata->getListFile());
35                 
36                 //clears file before we start to write to it below
37                 for (int i=0; i<groupMap->getNumGroups(); i++) {
38                         remove((fileroot + groupMap->namesOfGroups[i] + ".list").c_str());
39                 }
40         
41         }
42         catch(exception& e) {
43                 errorOut(e, "ParseListCommand", "ParseListCommand");
44                 exit(1);
45         }
46         
47 }
48 /***********************************************************************/
49 void ParseListCommand::parse(int index, SharedListVector* list) {
50         try {
51                 string member, bin, groupName;
52                 bin = list->get(index);
53                 
54                 while (bin.find_first_of(',') != -1) {//while you still have sequences
55                         member = bin.substr(0,bin.find_first_of(','));
56                         if ((bin.find_first_of(',')+1) <= bin.length()) {  //checks to make sure you don't have comma at end of string
57                                 bin = bin.substr(bin.find_first_of(',')+1, bin.length());
58                         }
59                         
60                         groupName = groupMap->getGroup(member);
61                         if (groupName != "not found") {
62                                 listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group.
63                         }else {
64                                 mothurOut("Error: Sequence '" + toString(member) + "' was not found in the group file, please correct\n");
65                         }
66                 }
67                 
68                 //save last name after comma
69                 groupName = groupMap->getGroup(bin);
70                 if (groupName != "not found") {
71                         listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group.
72                 }else {
73                         mothurOut("Error: Sequence '" + toString(bin) + "' was not found in the group file, please correct\n");
74                 }
75         }
76         catch(exception& e) {
77                 errorOut(e, "ParseListCommand", "parse");
78                 exit(1);
79         }
80 }
81
82 //**********************************************************************************************************************
83
84 int ParseListCommand::execute(){
85         try{
86                 
87                         int count = 1;
88                         
89                         //read in listfile
90                         read = new ReadOTUFile(globaldata->inputFileName);      
91                         read->read(&*globaldata); 
92                         input = globaldata->ginput;
93                         list = globaldata->gSharedList;
94                         string lastLabel = list->getLabel();
95                 
96                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
97                         set<string> processedLabels;
98                         set<string> userLabels = globaldata->labels;
99                         set<int> userLines = globaldata->lines;
100                 
101                         //parses and sets each groups listvector
102                         //as long as you are not at the end of the file or done wih the lines you want
103                         while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
104                                                                 
105                                 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lastLabel) == 1){
106                                         mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
107                                         process(list);
108                                         
109                                         processedLabels.insert(list->getLabel());
110                                         userLabels.erase(list->getLabel());
111                                         userLines.erase(count);
112                                 }
113                                 
114                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
115                                         delete list;
116                                         list = input->getSharedListVector(lastLabel);
117                                         
118                                         mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
119                                         process(list);
120                                         
121                                         processedLabels.insert(list->getLabel());
122                                         userLabels.erase(list->getLabel());
123                                 }
124
125                                 
126                                 lastLabel = list->getLabel();                   
127                                 
128                                 delete list;
129                                 list = input->getSharedListVector();
130                                 count++;
131                         }
132                         
133                         //output error messages about any remaining user labels
134                         set<string>::iterator it;
135                         bool needToRun = false;
136                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
137                                 mothurOut("Your file does not include the label " + *it); 
138                                 if (processedLabels.count(lastLabel) != 1) {
139                                         mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
140                                         needToRun = true;
141                                 }else {
142                                         mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
143                                 }
144                         }
145                 
146                         //run last line if you need to
147                         if (needToRun == true)  {
148                                 delete list;
149                                 list = input->getSharedListVector(lastLabel);
150                                         
151                                 mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
152                                 process(list);
153                                 delete list;
154                         }
155                         
156                         globaldata->gSharedList = NULL;
157                         //delete list vectors to fill with parsed data
158                         for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) {
159                                 delete it2->second;
160                         }
161                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
162                                 delete it2->second;
163                         }
164                         
165                         delete input;  globaldata->ginput = NULL;
166                         delete read;
167
168                         
169                         return 0;
170         }
171         catch(exception& e) {
172                 errorOut(e, "ParseListCommand", "execute");
173                 exit(1);
174         }
175 }
176 //**********************************************************************************************************************
177
178 ParseListCommand::~ParseListCommand(){
179
180                         
181 }
182 //**********************************************************************************************************************
183 void ParseListCommand::process(SharedListVector* thisList) {
184         try {
185                         string seq;
186
187                         for(int i=0; i<thisList->size(); i++) {
188                                 parse(i, thisList); //parses data[i] list of sequence names
189                                 for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
190                                         seq = it->second;
191                                         seq = seq.substr(1, seq.length()); //rips off extra comma
192                                         mapOfLists[it->first]->push_back(seq); //sets new listvector for each group
193                                 }
194                                 listGroups.clear();
195                         }
196                         //prints each new list file
197                         for (int i=0; i<groupMap->getNumGroups(); i++) {
198                                 openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
199                                 mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
200                                 mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
201                                 mapOfLists[groupMap->namesOfGroups[i]]->clear();
202                                 (*(filehandles[groupMap->namesOfGroups[i]])).close();
203                         }
204
205         }
206         catch(exception& e) {
207                 errorOut(e, "ParseListCommand", "process");
208                 exit(1);
209         }
210 }