]> git.donarmstrong.com Git - mothur.git/blob - parselistcommand.cpp
Initial revision
[mothur.git] / parselistcommand.cpp
1 /*
2  *  parselistcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parselistcommand.h"
11
12 //**********************************************************************************************************************
13 ParseListCommand::ParseListCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //read in group map info.
18                 groupMap = new GroupMap(globaldata->getGroupFile());
19                 groupMap->readMap();
20                         
21                 //fill filehandles with neccessary ofstreams
22                 int i;
23                 ofstream* temp;
24                 for (i=0; i<groupMap->getNumGroups(); i++) {
25                         temp = new ofstream;
26                         filehandles[groupMap->namesOfGroups[i]] = temp;
27                 }
28                 
29                 //set fileroot
30                 if(globaldata->getFileRoot() != ""){
31                         fileroot = globaldata->getFileRoot();
32                 }
33                 else{
34                         fileroot = getRootName(globaldata->getDistFile());
35                 }
36                 
37                 //open output list files
38                 for (i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
39                         openOutputFile(fileroot + groupMap->namesOfGroups[i] + ".list", *(filehandles[groupMap->namesOfGroups[i]]));
40                 }
41         }
42         catch(exception& e) {
43                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46         catch(...) {
47                 cout << "An unknown error has occurred in the ParseListCommand class function ParseListCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }
50 }
51 /***********************************************************************/
52 void ParseListCommand::parse(int index) {
53         try {
54                 string prefix, suffix, groupsName;
55                 suffix = list->get(index);
56         
57                 while (suffix.find_first_of(',') != -1) {//while you still have sequences
58                         prefix = suffix.substr(0,suffix.find_first_of(','));
59                         if ((suffix.find_first_of(',')+1) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
60                                 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
61                         }
62                         
63                         groupsName = groupMap->getGroup(prefix);
64                         if (groupsName != "not found") {
65                                 listGroups[groupsName] = listGroups[groupsName] + "," + prefix; //adds prefix to the correct group.
66                         }else {
67                                 cerr << "Error: Sequence '" << prefix << "' was not found in the group file, please correct\n";
68                         }
69                 }
70                 
71                 //save last name after comma
72                 groupsName = groupMap->getGroup(suffix);
73                 if (groupsName != "not found") {
74                         listGroups[groupsName] = listGroups[groupsName] + "," + suffix; //adds prefix to the correct group.
75                 }else {
76                         cerr << "Error: Sequence '" << suffix << "' was not found in the group file, please correct\n";
77                 }
78         }
79         catch(exception& e) {
80                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
81                 exit(1);
82         }
83         catch(...) {
84                 cout << "An unknown error has occurred in the ParseListCommand class function parse. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
85                 exit(1);
86         }
87 }
88
89 //**********************************************************************************************************************
90
91 int ParseListCommand::execute(){
92         try{
93                         globaldata = GlobalData::getInstance();
94                         
95                         //read in listfile
96                         read = new ReadPhilFile(globaldata->inputFileName);     
97                         read->read(&*globaldata); 
98                         input = globaldata->ginput;
99                         //list = input->getListVector();
100                         list = globaldata->glist;
101
102                         //read in group map info.
103                         groupMap = new GroupMap(globaldata->getGroupFile());
104                         groupMap->readMap();
105                         
106                         string seq, label;
107                         int i;
108                         //create new list vectors to fill with parsed data
109                         for (i=0; i<groupMap->getNumGroups(); i++) {
110                                 groupOfLists[groupMap->namesOfGroups[i]] = new ListVector();
111                         }
112                         
113                         //parses and sets each groups listvector
114                         while(list != NULL){
115                                 label = list->getLabel();
116                                 for(i=0; i<list->size(); i++) {
117                                         parse(i); //parses data[i] list of sequence names
118                                         for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
119                                                 seq = it->second;
120                                                 seq = seq.substr(1, seq.length()); //rips off extra comma
121                                                 groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
122                                         }
123                                         listGroups.clear();
124                                 }
125                                 //prints each new list file
126                                 for (i=0; i<groupMap->getNumGroups(); i++) {
127                                         groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
128                                         groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
129                                         groupOfLists[groupMap->namesOfGroups[i]]->clear();
130                                 }
131                                 list = input->getListVector();
132                         }
133                         return 0;
134         }
135         catch(exception& e) {
136                 cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
137                 exit(1);
138         }
139         catch(...) {
140                 cout << "An unknown error has occurred in the ParseListCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
141                 exit(1);
142         }
143
144 }
145 //**********************************************************************************************************************
146
147 ParseListCommand::~ParseListCommand(){
148         delete list;
149         delete groupMap;
150         delete input;
151         delete read;    
152 }
153 //**********************************************************************************************************************