]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
This is v.1.4.0
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 groupMap = new GroupMap(globaldata->getGroupFile());
19                 groupMap->readMap();
20
21                 //fill filehandles with neccessary ofstreams
22                 int i;
23                 ofstream* temp;
24                 for (i=0; i<groupMap->getNumGroups(); i++) {
25                         temp = new ofstream;
26                         filehandles[groupMap->namesOfGroups[i]] = temp;
27                 }
28                 
29                 //set fileroot
30                 fileroot = getRootName(globaldata->getListFile());
31                 
32                 //open output list files
33                 for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
34                         openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
35                 }
36         }
37         catch(exception& e) {
38                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
39                 exit(1);
40         }
41         catch(...) {
42                 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
43                 exit(1);
44         }
45 }
46 /***********************************************************************/
47 void ParseListCommand::parse(int index, SharedListVector* list) {
48         try {
49                 string prefix, suffix, groupsName;
50                 suffix = list->get(index);
51         
52                 while (suffix.find_first_of(',') != -1) {//while you still have sequences
53                         prefix = suffix.substr(0,suffix.find_first_of(','));
54                         if ((suffix.find_first_of(',')+1) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
55                                 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
56                         }
57                         
58                         groupsName = groupMap->getGroup(prefix);
59                         if (groupsName != "not found") {
60                                 listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
61                         }else {
62                                 cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
63                         }
64                 }
65                 
66                 //save last name after comma
67                 groupsName = groupMap->getGroup(suffix);
68                 if (groupsName != "not found") {
69                         listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
70                 }else {
71                         cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
72                 }
73         }
74         catch(exception& e) {
75                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
76                 exit(1);
77         }
78         catch(...) {
79                 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
80                 exit(1);
81         }
82 }
83
84 //**********************************************************************************************************************
85
86 int ParseListCommand::execute(){
87         try{
88                         int count = 1;
89                         
90                         //read in listfile
91                         read = new ReadOTUFile(globaldata->inputFileName);      
92                         read->read(&*globaldata); 
93                         input = globaldata->ginput;
94                         list = globaldata->gSharedList;
95                         SharedListVector* lastList = list;
96                 
97                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
98                         set<string> processedLabels;
99                         set<string> userLabels = globaldata->labels;
100                         set<int> userLines = globaldata->lines;
101                         
102                         //create new list vectors to fill with parsed data
103                         for (int i=0; i<groupMap->getNumGroups(); i++) {
104                                 groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector();
105                         }
106                         
107                                                 
108                         //parses and sets each groups listvector
109                         //as long as you are not at the end of the file or done wih the lines you want
110                         while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
111                                                                 
112                                 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
113                                         cout << list->getLabel() << '\t' << count << endl;
114                                         process(list);
115                                         
116                                         processedLabels.insert(list->getLabel());
117                                         userLabels.erase(list->getLabel());
118                                         userLines.erase(count);
119                                 }
120                                 
121                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
122                                         cout << lastList->getLabel() << '\t' << count << endl;
123                                         process(lastList);
124                                         
125                                         processedLabels.insert(lastList->getLabel());
126                                         userLabels.erase(lastList->getLabel());
127                                 }
128
129                                 if (count != 1) { delete lastList; }
130                                 lastList = list;                        
131
132                                 list = input->getSharedListVector();
133                                 count++;
134                         }
135                         
136                         //output error messages about any remaining user labels
137                         set<string>::iterator it;
138                         bool needToRun = false;
139                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
140                                 cout << "Your file does not include the label "<< *it; 
141                                 if (processedLabels.count(lastList->getLabel()) != 1) {
142                                         cout << ". I will use " << lastList->getLabel() << "." << endl;
143                                         needToRun = true;
144                                 }else {
145                                         cout << ". Please refer to " << lastList->getLabel() << "." << endl;
146                                 }
147                         }
148                 
149                         //run last line if you need to
150                         if (needToRun == true)  {
151                                 cout << lastList->getLabel() << '\t' << count << endl;
152                                 process(lastList);
153                         }
154                         delete lastList;
155
156                         //set groupmap for .shared commands
157                         if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
158                         globaldata->gGroupmap = groupMap; 
159                         
160                         //close files
161                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
162                                 ofstream* temp = it3->second;
163                                 (*temp).close(); 
164                                 delete it3->second;
165                         }
166                         
167                         //delete list vectors to fill with parsed data
168                         for (it2 = groupOfLists.begin(); it2 != groupOfLists.end(); it2++) {
169                                 delete it2->second;
170                         }
171
172                         return 0;
173         }
174         catch(exception& e) {
175                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
176                 exit(1);
177         }
178         catch(...) {
179                 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
180                 exit(1);
181         }
182
183 }
184 //**********************************************************************************************************************
185
186 ParseListCommand::~ParseListCommand(){
187         
188                 globaldata->gSharedList = NULL;
189                 delete input;  globaldata->ginput = NULL;
190                 delete read;
191         
192 }
193 //**********************************************************************************************************************
194 void ParseListCommand::process(SharedListVector* thisList) {
195         try {
196                         string seq;
197
198                         for(int i=0; i<thisList->size(); i++) {
199                                 parse(i, thisList); //parses data[i] list of sequence names
200                                 for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
201                                         seq = it->second;
202                                         seq = seq.substr(1, seq.length()); //rips off extra comma
203                                         groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
204                                 }
205                                 listGroups.clear();
206                         }
207                         //prints each new list file
208                         for (int i=0; i<groupMap->getNumGroups(); i++) {
209                                 groupOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
210                                 groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
211                                 groupOfLists[groupMap->namesOfGroups[i]]->clear();
212                         }
213
214         }
215         catch(exception& e) {
216                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
217                 exit(1);
218         }
219         catch(...) {
220                 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
221                 exit(1);
222         }
223 }