]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
8173bdc3086ccd5cc88da03aabef294411d98f07
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 //groupMap = new GroupMap(globaldata->getGroupFile());
19                 //groupMap->readMap();
20                 groupMap = globaldata->gGroupmap;
21
22                 //fill filehandles with neccessary ofstreams
23                 int i;
24                 ofstream* temp;
25                 SharedListVector* templist;
26                 for (i=0; i<groupMap->getNumGroups(); i++) {
27                         temp = new ofstream;
28                         templist = new SharedListVector();
29                         filehandles[groupMap->namesOfGroups[i]] = temp;
30                         mapOfLists[groupMap->namesOfGroups[i]] = templist;
31                 }
32                 
33                 //set fileroot
34                 fileroot = getRootName(globaldata->getListFile());
35                 
36                 //clears file before we start to write to it below
37                 for (int i=0; i<groupMap->getNumGroups(); i++) {
38                         openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
39                         (*(filehandles[groupMap->namesOfGroups[i]])).close();
40                 }
41
42                 
43         }
44         catch(exception& e) {
45                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46                 exit(1);
47         }
48         catch(...) {
49                 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
50                 exit(1);
51         }
52 }
53 /***********************************************************************/
54 void ParseListCommand::parse(int index, SharedListVector* list) {
55         try {
56                 string member, bin, groupName;
57                 bin = list->get(index);
58                 
59                 while (bin.find_first_of(',') != -1) {//while you still have sequences
60                         member = bin.substr(0,bin.find_first_of(','));
61                         if ((bin.find_first_of(',')+1) <= bin.length()) {  //checks to make sure you don't have comma at end of string
62                                 bin = bin.substr(bin.find_first_of(',')+1, bin.length());
63                         }
64                         
65                         groupName = groupMap->getGroup(member);
66                         if (groupName != "not found") {
67                                 listGroups[groupName] = listGroups[groupName] + "," + member; //adds prefix to the correct group.
68                         }else {
69                                 cerr << "Error: Sequence '" << member << "' was not found in the group file, please correct\n";
70                         }
71                 }
72                 
73                 //save last name after comma
74                 groupName = groupMap->getGroup(bin);
75                 if (groupName != "not found") {
76                         listGroups[groupName] = listGroups[groupName] + "," + bin; //adds prefix to the correct group.
77                 }else {
78                         cerr << "Error: Sequence '" << bin << "' was not found in the group file, please correct\n";
79                 }
80         }
81         catch(exception& e) {
82                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83                 exit(1);
84         }
85         catch(...) {
86                 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
87                 exit(1);
88         }
89 }
90
91 //**********************************************************************************************************************
92
93 int ParseListCommand::execute(){
94         try{
95                 
96                         int count = 1;
97                         
98                         //read in listfile
99                         read = new ReadOTUFile(globaldata->inputFileName);      
100                         read->read(&*globaldata); 
101                         input = globaldata->ginput;
102                         list = globaldata->gSharedList;
103                         SharedListVector* lastList = list;
104                 
105                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
106                         set<string> processedLabels;
107                         set<string> userLabels = globaldata->labels;
108                         set<int> userLines = globaldata->lines;
109                         
110                         //parses and sets each groups listvector
111                         //as long as you are not at the end of the file or done wih the lines you want
112                         while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
113                                                                 
114                                 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
115                                         cout << list->getLabel() << '\t' << count << endl;
116                                         process(list);
117                                         
118                                         processedLabels.insert(list->getLabel());
119                                         userLabels.erase(list->getLabel());
120                                         userLines.erase(count);
121                                 }
122                                 
123                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
124                                         cout << lastList->getLabel() << '\t' << count << endl;
125                                         process(lastList);
126                                         
127                                         processedLabels.insert(lastList->getLabel());
128                                         userLabels.erase(lastList->getLabel());
129                                 }
130
131                                 if (count != 1) { delete lastList; }
132                                 lastList = list;                        
133
134                                 list = input->getSharedListVector();
135                                 count++;
136                         }
137                         
138                         //output error messages about any remaining user labels
139                         set<string>::iterator it;
140                         bool needToRun = false;
141                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
142                                 cout << "Your file does not include the label "<< *it; 
143                                 if (processedLabels.count(lastList->getLabel()) != 1) {
144                                         cout << ". I will use " << lastList->getLabel() << "." << endl;
145                                         needToRun = true;
146                                 }else {
147                                         cout << ". Please refer to " << lastList->getLabel() << "." << endl;
148                                 }
149                         }
150                 
151                         //run last line if you need to
152                         if (needToRun == true)  {
153                                 cout << lastList->getLabel() << '\t' << count << endl;
154                                 process(lastList);
155                         }
156                         
157                         delete lastList;  globaldata->gSharedList = NULL;
158                         //delete list vectors to fill with parsed data
159                         for (it2 = mapOfLists.begin(); it2 != mapOfLists.end(); it2++) {
160                                 delete it2->second;
161                         }
162                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
163                                 delete it2->second;
164                         }
165                         
166                         delete input;  globaldata->ginput = NULL;
167                         delete read;
168
169                         
170                         return 0;
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180
181 }
182 //**********************************************************************************************************************
183
184 ParseListCommand::~ParseListCommand(){
185
186                         
187 }
188 //**********************************************************************************************************************
189 void ParseListCommand::process(SharedListVector* thisList) {
190         try {
191                         string seq;
192
193                         for(int i=0; i<thisList->size(); i++) {
194                                 parse(i, thisList); //parses data[i] list of sequence names
195                                 for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
196                                         seq = it->second;
197                                         seq = seq.substr(1, seq.length()); //rips off extra comma
198                                         mapOfLists[it->first]->push_back(seq); //sets new listvector for each group
199                                 }
200                                 listGroups.clear();
201                         }
202                         //prints each new list file
203                         for (int i=0; i<groupMap->getNumGroups(); i++) {
204                                 openOutputFileAppend(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
205                                 mapOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
206                                 mapOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
207                                 mapOfLists[groupMap->namesOfGroups[i]]->clear();
208                                 (*(filehandles[groupMap->namesOfGroups[i]])).close();
209                         }
210
211         }
212         catch(exception& e) {
213                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }
216         catch(...) {
217                 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }
220 }