]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
fixed bug with get.oturep and fixed problem with errorcheckor that did not allow...
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11
12 //**********************************************************************************************************************
13 GetOTURepCommand::GetOTURepCommand(){
14         try{
15                 globaldata = GlobalData::getInstance();
16         
17                 if(globaldata->gSparseMatrix != NULL)   {       matrix = new SparseMatrix(*globaldata->gSparseMatrix);          }
18                 
19                 //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix
20                 if(globaldata->gListVector != NULL)             {       
21                         listOfNames = new ListVector(*globaldata->gListVector); 
22                         
23                         vector<string> names;
24                         string binnames;
25                         //map names to rows in sparsematrix
26                         for (int i = 0; i < listOfNames->size(); i++) {
27                                 names.clear();
28                                 binnames = listOfNames->get(i);
29                                 splitAtComma(binnames, names);
30                                 
31                                 for (int j = 0; j < names.size(); j++) {
32                                         nameToIndex[names[j]] = i;
33                                 }
34                         }
35                 }else { cout << "error, no listvector." << endl; }
36
37                 
38                 fastafile = globaldata->getFastaFile();
39                 namesfile = globaldata->getNameFile();
40                 openInputFile(fastafile, in);
41                 
42                 fasta = new FastaMap();
43
44         }
45         catch(exception& e) {
46                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
47                 exit(1);
48         }
49         catch(...) {
50                 cout << "An unknown error has occurred in the GetOTURepCommand class function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55
56 GetOTURepCommand::~GetOTURepCommand(){
57         delete matrix;
58         delete list;
59         delete input;
60         delete read;
61         delete fasta;
62 }
63
64 //**********************************************************************************************************************
65
66 int GetOTURepCommand::execute(){
67         try {
68                 int count = 1;
69                 string nameRep, name, sequence;
70                 
71                 //read fastafile
72                 fasta->readFastaFile(in);
73                 
74                 //set format to list so input can get listvector
75                 globaldata->setFormat("list");
76                 
77                 //if user gave a namesfile then use it
78                 if (namesfile != "") {
79                         readNamesFile();
80                 }
81                 
82                 //read list file
83                 read = new ReadPhilFile(globaldata->getListFile());     
84                 read->read(&*globaldata); 
85                 
86                 input = globaldata->ginput;
87                 list = globaldata->gListVector;
88                 
89                 while(list != NULL){
90                         
91                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
92                                 
93                                 //create output file
94                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".rep.fasta";
95                                 openOutputFile(outputFileName, out);
96
97                                 cout << list->getLabel() << '\t' << count << endl;
98                                 
99                                 //for each bin in the list vector
100                                 for (int i = 0; i < list->size(); i++) {
101                                         nameRep = FindRep(i);
102                                         
103                                         //print out name and sequence for that bin
104                                         sequence = fasta->getSequence(nameRep);
105
106                                         if (sequence != "not found") {
107                                                 nameRep = nameRep + "|" + toString(i+1);
108                                                 out << ">" << nameRep << endl;
109                                                 out << sequence << endl;
110                                         }else { 
111                                                 cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; 
112                                                 remove(outputFileName.c_str());
113                                                 return 0;
114                                         }
115                                 }
116                                 
117                                 out.close();
118                         }
119                         
120                         list = input->getListVector();
121                         count++;
122                 }
123
124                 
125                 return 0;
126         }
127         catch(exception& e) {
128                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
129                 exit(1);
130         }
131         catch(...) {
132                 cout << "An unknown error has occurred in the GetOTURepCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
133                 exit(1);
134         }
135
136 }
137
138 //**********************************************************************************************************************
139 void GetOTURepCommand::readNamesFile() {
140         try {
141                 vector<string> dupNames;
142                 openInputFile(namesfile, inNames);
143                 
144                 string name, names, sequence;
145         
146                 while(inNames){
147                         inNames >> name;                        //read from first column  A
148                         inNames >> names;               //read from second column  A,B,C,D
149                         
150                         dupNames.clear();
151                         
152                         //parse names into vector
153                         splitAtComma(names, dupNames);
154                         
155                         //store names in fasta map
156                         sequence = fasta->getSequence(name);
157                         for (int i = 0; i < dupNames.size(); i++) {
158                                 fasta->push_back(dupNames[i], sequence);
159                         }
160                 
161                         gobble(inNames);
162                 }
163                 inNames.close();
164
165         }
166         catch(exception& e) {
167                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }
170         catch(...) {
171                 cout << "An unknown error has occurred in the GetOTURepCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
172                 exit(1);
173         }       
174 }
175 //**********************************************************************************************************************
176 string GetOTURepCommand::FindRep(int bin) {
177         try{
178                 vector<string> names;
179                 map<string, float> sums;
180                 map<string, float>::iterator it4;
181                 map<int, string> binMap; //subset of namesToIndex - just member of this bin
182                 string binnames;
183                 float min = 10000;
184                 string minName;
185                 
186                 binnames = list->get(bin);
187         
188                 //parse names into vector
189                 splitAtComma(binnames, names);
190                 
191                 //if only 1 sequence in bin then that's the rep
192                 if (names.size() == 1) { return names[0]; }
193                 else {
194                         //fill binMap
195                         for (int i = 0; i < names.size(); i++) {
196                                 for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
197
198                                         if (it3->first == names[i]) {  
199                                                 binMap[it3->second] = it3->first;
200
201                                                 //initialize sums map
202                                                 sums[it3->first] = 0.0;
203                                                 break;
204                                         }
205                                 }
206                         }
207                         
208                         //go through each cell in the sparsematrix
209                         for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
210                                 //is this a distance between 2 members of this bin
211                                 it = binMap.find(currentCell->row);
212                                 it2 = binMap.find(currentCell->column);
213                                 
214                                 //sum the distance of the sequences in the bin to eachother
215                                 if ((it != binMap.end()) && (it2 != binMap.end())) {
216                                         //this is a cell that repesents the distance between to of this bins members
217                                         sums[it->second] += currentCell->dist;
218                                         sums[it2->second] += currentCell->dist;
219                                 }
220                         }
221                         
222                         //smallest sum is the representative
223                         for (it4 = sums.begin(); it4 != sums.end(); it4++) {
224                                 if (it4->second < min) {
225                                         min = it4->second;
226                                         minName = it4->first;
227                                 }
228
229                         }
230                         
231                         return minName;
232                 }
233         
234         }
235         catch(exception& e) {
236                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }
239         catch(...) {
240                 cout << "An unknown error has occurred in the GetOTURepCommand class function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
241                 exit(1);
242         }       
243 }
244
245
246
247
248