]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
added get.rabund and get.sabund command and fixed bug introduced by line by line...
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 groupMap = new GroupMap(globaldata->getGroupFile());
19                 groupMap->readMap();
20
21                 
22                 //fill filehandles with neccessary ofstreams
23                 int i;
24                 ofstream* temp;
25                 for (i=0; i<groupMap->getNumGroups(); i++) {
26                         temp = new ofstream;
27                         filehandles[groupMap->namesOfGroups[i]] = temp;
28                 }
29                 
30                 //set fileroot
31                 fileroot = getRootName(globaldata->getListFile());
32                 
33                 //open output list files
34                 for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
35                         openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
36                 }
37         }
38         catch(exception& e) {
39                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
40                 exit(1);
41         }
42         catch(...) {
43                 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46 }
47 /***********************************************************************/
48 void ParseListCommand::parse(int index, SharedListVector* list) {
49         try {
50                 string prefix, suffix, groupsName;
51                 suffix = list->get(index);
52         
53                 while (suffix.find_first_of(',') != -1) {//while you still have sequences
54                         prefix = suffix.substr(0,suffix.find_first_of(','));
55                         if ((suffix.find_first_of(',')+1) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
56                                 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
57                         }
58                         
59                         groupsName = groupMap->getGroup(prefix);
60                         if (groupsName != "not found") {
61                                 listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
62                         }else {
63                                 cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
64                         }
65                 }
66                 
67                 //save last name after comma
68                 groupsName = groupMap->getGroup(suffix);
69                 if (groupsName != "not found") {
70                         listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
71                 }else {
72                         cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
73                 }
74         }
75         catch(exception& e) {
76                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
77                 exit(1);
78         }
79         catch(...) {
80                 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
81                 exit(1);
82         }
83 }
84
85 //**********************************************************************************************************************
86
87 int ParseListCommand::execute(){
88         try{
89                         globaldata = GlobalData::getInstance();
90                         int count = 1;
91                         
92                         //read in listfile
93                         read = new ReadOTUFile(globaldata->inputFileName);      
94                         read->read(&*globaldata); 
95                         input = globaldata->ginput;
96                         list = globaldata->gSharedList;
97                         SharedListVector* lastList = list;
98                 
99                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
100                         set<string> processedLabels;
101                         set<string> userLabels = globaldata->labels;
102                         set<int> userLines = globaldata->lines;
103
104                         //read in group map info.
105                         groupMap = new GroupMap(globaldata->getGroupFile());
106                         groupMap->readMap();
107                         
108                         //create new list vectors to fill with parsed data
109                         for (int i=0; i<groupMap->getNumGroups(); i++) {
110                                 groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector();
111                         }
112                         
113                                                 
114                         //parses and sets each groups listvector
115                         //as long as you are not at the end of the file or done wih the lines you want
116                         while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
117                                                                 
118                                 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
119                                         cout << list->getLabel() << '\t' << count << endl;
120                                         process(list);
121                                         
122                                         processedLabels.insert(list->getLabel());
123                                         userLabels.erase(list->getLabel());
124                                         userLines.erase(count);
125                                 }
126                                 
127                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
128                                         cout << lastList->getLabel() << '\t' << count << endl;
129                                         process(lastList);
130                                         
131                                         processedLabels.insert(lastList->getLabel());
132                                         userLabels.erase(lastList->getLabel());
133                                 }
134
135                                 if (count != 1) { delete lastList; }
136                                 lastList = list;                        
137
138                                 list = input->getSharedListVector();
139                                 count++;
140                         }
141                         
142                         //output error messages about any remaining user labels
143                         set<string>::iterator it;
144                         bool needToRun = false;
145                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
146                                 cout << "Your file does not include the label "<< *it; 
147                                 if (processedLabels.count(lastList->getLabel()) != 1) {
148                                         cout << ". I will use " << lastList->getLabel() << "." << endl;
149                                         needToRun = true;
150                                 }else {
151                                         cout << ". Please refer to " << lastList->getLabel() << "." << endl;
152                                 }
153                         }
154                 
155                         //run last line if you need to
156                         if (needToRun == true)  {
157                                 cout << lastList->getLabel() << '\t' << count << endl;
158                                 process(lastList);
159                         }
160                         delete lastList;
161
162                         //set groupmap for .shared commands
163                         if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
164                         globaldata->gGroupmap = groupMap; 
165                         
166                         return 0;
167         }
168         catch(exception& e) {
169                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
170                 exit(1);
171         }
172         catch(...) {
173                 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176
177 }
178 //**********************************************************************************************************************
179
180 ParseListCommand::~ParseListCommand(){
181         delete list;
182         delete input;
183         delete read;    
184 }
185 //**********************************************************************************************************************
186 void ParseListCommand::process(SharedListVector* thisList) {
187         try {
188                         string seq;
189
190                         for(int i=0; i<thisList->size(); i++) {
191                                 parse(i, thisList); //parses data[i] list of sequence names
192                                 for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
193                                         seq = it->second;
194                                         seq = seq.substr(1, seq.length()); //rips off extra comma
195                                         groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
196                                 }
197                                 listGroups.clear();
198                         }
199                         //prints each new list file
200                         for (int i=0; i<groupMap->getNumGroups(); i++) {
201                                 groupOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel());
202                                 groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
203                                 groupOfLists[groupMap->namesOfGroups[i]]->clear();
204                         }
205
206         }
207         catch(exception& e) {
208                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
209                 exit(1);
210         }
211         catch(...) {
212                 cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
213                 exit(1);
214         }
215 }