]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
removed "shared" from some of the calculator names and classes
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11
12 //**********************************************************************************************************************
13 GetOTURepCommand::GetOTURepCommand(){
14         try{
15                 globaldata = GlobalData::getInstance();
16         
17                 if(globaldata->gSparseMatrix != NULL)   {       matrix = new SparseMatrix(*globaldata->gSparseMatrix);          }
18                 
19                 //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix
20                 if(globaldata->gListVector != NULL)             {       
21                         listOfNames = new ListVector(*globaldata->gListVector); 
22                         
23                         //map names to rows in sparsematrix
24                         for (int i = 0; i < listOfNames->size(); i++) {
25                                 nameToIndex[listOfNames->get(i)] = i;
26                         }
27                 }else { cout << "error" << endl; }
28
29                 
30                 fastafile = globaldata->getFastaFile();
31                 namesfile = globaldata->getNameFile();
32                 openInputFile(fastafile, in);
33                 
34                 fasta = new FastaMap();
35
36         }
37         catch(exception& e) {
38                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
39                 exit(1);
40         }
41         catch(...) {
42                 cout << "An unknown error has occurred in the GetOTURepCommand class function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47
48 GetOTURepCommand::~GetOTURepCommand(){
49         delete matrix;
50         delete list;
51         delete input;
52         delete read;
53         delete fasta;
54 }
55
56 //**********************************************************************************************************************
57
58 int GetOTURepCommand::execute(){
59         try {
60                 int count = 1;
61                 string nameRep, name, sequence;
62                 
63                 //read fastafile
64                 fasta->readFastaFile(in);
65                 
66                 //set format to list so input can get listvector
67                 globaldata->setFormat("list");
68                 
69                 //if user gave a namesfile then use it
70                 if (namesfile != "") {
71                         readNamesFile();
72                 }
73                 
74                 //read list file
75                 read = new ReadPhilFile(globaldata->getListFile());     
76                 read->read(&*globaldata); 
77                 
78                 input = globaldata->ginput;
79                 list = globaldata->gListVector;
80                 
81                 while(list != NULL){
82                         
83                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
84                                 
85                                 //create output file
86                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".rep.fasta";
87                                 openOutputFile(outputFileName, out);
88
89                                 cout << list->getLabel() << '\t' << count << endl;
90                                 
91                                 //for each bin in the list vector
92                                 for (int i = 0; i < list->size(); i++) {
93                                         nameRep = FindRep(i);
94                                         
95                                         //print out name and sequence for that bin
96                                         sequence = fasta->getSequence(nameRep);
97
98                                         if (sequence != "not found") {
99                                                 nameRep = nameRep + "|" + toString(i+1);
100                                                 out << ">" << nameRep << endl;
101                                                 out << sequence << endl;
102                                         }else { 
103                                                 cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; 
104                                                 remove(outputFileName.c_str());
105                                                 return 0;
106                                         }
107                                 }
108                                 
109                                 out.close();
110                         }
111                         
112                         list = input->getListVector();
113                         count++;
114                 }
115
116                 
117                 return 0;
118         }
119         catch(exception& e) {
120                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
121                 exit(1);
122         }
123         catch(...) {
124                 cout << "An unknown error has occurred in the GetOTURepCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
125                 exit(1);
126         }
127
128 }
129
130 //**********************************************************************************************************************
131 void GetOTURepCommand::readNamesFile() {
132         try {
133                 vector<string> dupNames;
134                 openInputFile(namesfile, inNames);
135                 
136                 string name, names, sequence;
137         
138                 while(inNames){
139                         inNames >> name;                        //read from first column  A
140                         inNames >> names;               //read from second column  A,B,C,D
141                         
142                         dupNames.clear();
143                         
144                         //parse names into vector
145                         splitAtComma(names, dupNames);
146                         
147                         //store names in fasta map
148                         sequence = fasta->getSequence(name);
149                         for (int i = 0; i < dupNames.size(); i++) {
150                                 fasta->push_back(dupNames[i], sequence);
151                         }
152                 
153                         gobble(inNames);
154                 }
155                 inNames.close();
156
157         }
158         catch(exception& e) {
159                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
160                 exit(1);
161         }
162         catch(...) {
163                 cout << "An unknown error has occurred in the GetOTURepCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }       
166 }
167 //**********************************************************************************************************************
168 string GetOTURepCommand::FindRep(int bin) {
169         try{
170                 vector<string> names;
171                 map<string, float> sums;
172                 map<string, float>::iterator it4;
173                 map<int, string> binMap; //subset of namesToIndex - just member of this bin
174                 string binnames;
175                 float min = 10000;
176                 string minName;
177                 
178                 binnames = list->get(bin);
179                 
180                 //parse names into vector
181                 splitAtComma(binnames, names);
182                 
183                 //if only 1 sequence in bin then that's the rep
184                 if (names.size() == 1) { return names[0]; }
185                 else {
186                         //fill binMap
187                         for (int i = 0; i < names.size(); i++) {
188                                 for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
189                                         if (it3->first == names[i]) {  
190                                                 binMap[it3->second] = it3->first;
191
192                                                 //initialize sums map
193                                                 sums[it3->first] = 0.0;
194                                                 break;
195                                         }
196                                 }
197                         }
198                         
199                         //go through each cell in the sparsematrix
200                         for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
201                                 //is this a distance between 2 members of this bin
202                                 it = binMap.find(currentCell->row);
203                                 it2 = binMap.find(currentCell->column);
204                                 
205                                 //sum the distance of the sequences in the bin to eachother
206                                 if ((it != binMap.end()) && (it2 != binMap.end())) {
207                                         //this is a cell that repesents the distance between to of this bins members
208                                         sums[it->second] += currentCell->dist;
209                                         sums[it2->second] += currentCell->dist;
210                                 }
211                         }
212                         
213                         //smallest sum is the representative
214                         for (it4 = sums.begin(); it4 != sums.end(); it4++) {
215                                 if (it4->second < min) {
216                                         min = it4->second;
217                                         minName = it4->first;
218                                 }
219
220                         }
221                         
222                         return minName;
223                 }
224         
225         }
226         catch(exception& e) {
227                 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
228                 exit(1);
229         }
230         catch(...) {
231                 cout << "An unknown error has occurred in the GetOTURepCommand class function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
232                 exit(1);
233         }       
234 }
235
236
237
238
239