5 * Created by Sarah Westcott on 4/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "getoturepcommand.h"
12 //**********************************************************************************************************************
13 GetOTURepCommand::GetOTURepCommand(){
15 globaldata = GlobalData::getInstance();
17 if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); }
19 //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix
20 if(globaldata->gListVector != NULL) {
21 listOfNames = new ListVector(*globaldata->gListVector);
23 //map names to rows in sparsematrix
24 for (int i = 0; i < listOfNames->size(); i++) {
25 nameToIndex[listOfNames->get(i)] = i;
27 }else { cout << "error" << endl; }
30 fastafile = globaldata->getFastaFile();
31 namesfile = globaldata->getNameFile();
32 openInputFile(fastafile, in);
34 fasta = new FastaMap();
38 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
42 cout << "An unknown error has occurred in the GetOTURepCommand class function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46 //**********************************************************************************************************************
48 GetOTURepCommand::~GetOTURepCommand(){
56 //**********************************************************************************************************************
58 int GetOTURepCommand::execute(){
61 string nameRep, name, sequence;
64 fasta->readFastaFile(in);
66 //set format to list so input can get listvector
67 globaldata->setFormat("list");
69 //if user gave a namesfile then use it
70 if (namesfile != "") {
75 read = new ReadPhilFile(globaldata->getListFile());
76 read->read(&*globaldata);
78 input = globaldata->ginput;
79 list = globaldata->gListVector;
83 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
86 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".rep.fasta";
87 openOutputFile(outputFileName, out);
89 cout << list->getLabel() << '\t' << count << endl;
91 //for each bin in the list vector
92 for (int i = 0; i < list->size(); i++) {
95 //print out name and sequence for that bin
96 sequence = fasta->getSequence(nameRep);
98 if (sequence != "not found") {
99 nameRep = nameRep + "|" + toString(i+1);
100 out << ">" << nameRep << endl;
101 out << sequence << endl;
103 cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl;
104 remove(outputFileName.c_str());
112 list = input->getListVector();
119 catch(exception& e) {
120 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
124 cout << "An unknown error has occurred in the GetOTURepCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
130 //**********************************************************************************************************************
131 void GetOTURepCommand::readNamesFile() {
133 vector<string> dupNames;
134 openInputFile(namesfile, inNames);
136 string name, names, sequence;
139 inNames >> name; //read from first column A
140 inNames >> names; //read from second column A,B,C,D
144 //parse names into vector
145 splitAtComma(names, dupNames);
147 //store names in fasta map
148 sequence = fasta->getSequence(name);
149 for (int i = 0; i < dupNames.size(); i++) {
150 fasta->push_back(dupNames[i], sequence);
158 catch(exception& e) {
159 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
163 cout << "An unknown error has occurred in the GetOTURepCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167 //**********************************************************************************************************************
168 string GetOTURepCommand::FindRep(int bin) {
170 vector<string> names;
171 map<string, float> sums;
172 map<string, float>::iterator it4;
173 map<int, string> binMap; //subset of namesToIndex - just member of this bin
178 binnames = list->get(bin);
180 //parse names into vector
181 splitAtComma(binnames, names);
183 //if only 1 sequence in bin then that's the rep
184 if (names.size() == 1) { return names[0]; }
187 for (int i = 0; i < names.size(); i++) {
188 for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
189 if (it3->first == names[i]) {
190 binMap[it3->second] = it3->first;
192 //initialize sums map
193 sums[it3->first] = 0.0;
199 //go through each cell in the sparsematrix
200 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
201 //is this a distance between 2 members of this bin
202 it = binMap.find(currentCell->row);
203 it2 = binMap.find(currentCell->column);
205 //sum the distance of the sequences in the bin to eachother
206 if ((it != binMap.end()) && (it2 != binMap.end())) {
207 //this is a cell that repesents the distance between to of this bins members
208 sums[it->second] += currentCell->dist;
209 sums[it2->second] += currentCell->dist;
213 //smallest sum is the representative
214 for (it4 = sums.begin(); it4 != sums.end(); it4++) {
215 if (it4->second < min) {
217 minName = it4->first;
226 catch(exception& e) {
227 cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231 cout << "An unknown error has occurred in the GetOTURepCommand class function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";