]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
5de3b72daa63b962ff9be84b5665e7230fb2470b
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 groupMap = globaldata->gGroupmap;
19
20                 //fill filehandles with neccessary ofstreams
21                 int i;
22                 ofstream* temp;
23                 for (i=0; i<groupMap->getNumGroups(); i++) {
24                         temp = new ofstream;
25                         filehandles[groupMap->namesOfGroups[i]] = temp;
26                 }
27                 
28                 //set fileroot
29                 fileroot = getRootName(globaldata->getListFile());
30                 
31                 //open output list files
32                 for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
33                         openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
34                 }
35         }
36         catch(exception& e) {
37                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
38                 exit(1);
39         }
40         catch(...) {
41                 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
42                 exit(1);
43         }
44 }
45 /***********************************************************************/
46 void ParseListCommand::parse(int index, SharedListVector* list) {
47         try {
48                 string prefix, suffix, groupsName;
49                 suffix = list->get(index);
50         
51                 while (suffix.find_first_of(',') != -1) {//while you still have sequences
52                         prefix = suffix.substr(0,suffix.find_first_of(','));
53                         if ((suffix.find_first_of(',')+1) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
54                                 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
55                         }
56                         
57                         groupsName = groupMap->getGroup(prefix);
58                         if (groupsName != "not found") {
59                                 listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
60                         }else {
61                                 cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
62                         }
63                 }
64                 
65                 //save last name after comma
66                 groupsName = groupMap->getGroup(suffix);
67                 if (groupsName != "not found") {
68                         listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
69                 }else {
70                         cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
71                 }
72         }
73         catch(exception& e) {
74                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
75                 exit(1);
76         }
77         catch(...) {
78                 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
79                 exit(1);
80         }
81 }
82
83 //**********************************************************************************************************************
84
85 int ParseListCommand::execute(){
86         try{
87                         int count = 1;
88                         
89                         //read in listfile
90                         read = new ReadOTUFile(globaldata->inputFileName);      
91                         read->read(&*globaldata); 
92                         input = globaldata->ginput;
93                         list = globaldata->gSharedList;
94                         SharedListVector* lastList = list;
95                 
96                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
97                         set<string> processedLabels;
98                         set<string> userLabels = globaldata->labels;
99                         set<int> userLines = globaldata->lines;
100                         
101                         //create new list vectors to fill with parsed data
102                         for (int i=0; i<groupMap->getNumGroups(); i++) {
103                                 groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector();
104                         }
105                         
106                                                 
107                         //parses and sets each groups listvector
108                         //as long as you are not at the end of the file or done wih the lines you want
109                         while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
110                                                                 
111                                 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
112                                         cout << list->getLabel() << '\t' << count << endl;
113                                         process(list);
114                                         
115                                         processedLabels.insert(list->getLabel());
116                                         userLabels.erase(list->getLabel());
117                                         userLines.erase(count);
118                                 }
119                                 
120                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
121                                         cout << lastList->getLabel() << '\t' << count << endl;
122                                         process(lastList);
123                                         
124                                         processedLabels.insert(lastList->getLabel());
125                                         userLabels.erase(lastList->getLabel());
126                                 }
127
128                                 if (count != 1) { delete lastList; }
129                                 lastList = list;                        
130
131                                 list = input->getSharedListVector();
132                                 count++;
133                         }
134                         
135                         //output error messages about any remaining user labels
136                         set<string>::iterator it;
137                         bool needToRun = false;
138                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
139                                 cout << "Your file does not include the label "<< *it; 
140                                 if (processedLabels.count(lastList->getLabel()) != 1) {
141                                         cout << ". I will use " << lastList->getLabel() << "." << endl;
142                                         needToRun = true;
143                                 }else {
144                                         cout << ". Please refer to " << lastList->getLabel() << "." << endl;
145                                 }
146                         }
147                 
148                         //run last line if you need to
149                         if (needToRun == true)  {
150                                 cout << lastList->getLabel() << '\t' << count << endl;
151                                 process(lastList);
152                         }
153                         delete lastList;
154
155                         //set groupmap for .shared commands
156                         if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
157                         globaldata->gGroupmap = groupMap; 
158                         
159                         //close files
160                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
161                                 ofstream* temp = it3->second;
162                                 (*temp).close(); 
163                                 delete it3->second;
164                         }
165                         
166                         //delete list vectors to fill with parsed data
167                         for (it2 = groupOfLists.begin(); it2 != groupOfLists.end(); it2++) {
168                                 delete it2->second;
169                         }
170
171                         return 0;
172         }
173         catch(exception& e) {
174                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177         catch(...) {
178                 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181
182 }
183 //**********************************************************************************************************************
184
185 ParseListCommand::~ParseListCommand(){
186         delete list;
187         delete input;
188         delete read;    
189 }
190 //**********************************************************************************************************************
191 void ParseListCommand::process(SharedListVector* thisList) {
192         try {
193                         string seq;
194
195                         for(int i=0; i<thisList->size(); i++) {
196                                 parse(i, thisList); //parses data[i] list of sequence names
197                                 for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
198                                         seq = it->second;
199                                         seq = seq.substr(1, seq.length()); //rips off extra comma
200                                         groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
201                                 }
202                                 listGroups.clear();
203                         }
204                         //prints each new list file
205                         for (int i=0; i<groupMap->getNumGroups(); i++) {
206                                 groupOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
207                                 groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
208                                 groupOfLists[groupMap->namesOfGroups[i]]->clear();
209                         }
210
211         }
212         catch(exception& e) {
213                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }
216         catch(...) {
217                 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }
220 }