5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c){
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
21 /***********************************************************************/
23 int ReadCluster::read(NameAssignment* nameMap){
26 if (format == "phylip") { convertPhylip2Column(nameMap); }
27 else { list = new ListVector(nameMap->getListVector()); }
29 if (m->control_pressed) { return 0; }
31 OutPutFile = sortFile(distFile);
37 m->errorOut(e, "ReadCluster", "read");
41 /***********************************************************************/
43 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
45 //convert phylip file to column file
46 map<int, string> rowToName;
47 map<int, string>::iterator it;
51 string tempFile = distFile + ".column.temp";
53 openInputFile(distFile, in);
54 openOutputFile(tempFile, out);
59 vector<string> matrixNames;
63 matrixNames.push_back(name);
66 list = new ListVector(nseqs);
70 list = new ListVector(nameMap->getListVector());
71 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
75 while((d=in.get()) != EOF){
80 for(int i=0;i<nseqs;i++){
93 for(int i=1;i<nseqs;i++){
96 matrixNames.push_back(name);
98 //there's A LOT of repeated code throughout this method...
102 for(int j=0;j<i;j++){
104 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
108 if (distance == -1) { distance = 1000000; }
110 if(distance < cutoff){
111 out << i << '\t' << j << '\t' << distance << endl;
117 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
119 for(int j=0;j<i;j++){
121 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
125 if (distance == -1) { distance = 1000000; }
127 if(distance < cutoff){
128 out << i << '\t' << j << '\t' << distance << endl;
135 for(int i=1;i<nseqs;i++){
138 matrixNames.push_back(name);
142 for(int j=0;j<nseqs;j++){
143 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
147 if (distance == -1) { distance = 1000000; }
149 if(distance < cutoff && j < i){
150 out << i << '\t' << j << '\t' << distance << endl;
155 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
157 for(int j=0;j<nseqs;j++){
158 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
162 if (distance == -1) { distance = 1000000; }
164 if(distance < cutoff && j < i){
165 out << i << '\t' << j << '\t' << distance << endl;
178 nameMap = new NameAssignment();
179 for(int i=0;i<matrixNames.size();i++){
180 nameMap->push_back(matrixNames[i]);
182 globaldata->nameMap = nameMap;
189 string outputFile = getRootName(distFile) + "column.dist";
190 openInputFile(tempFile, in2);
191 openOutputFile(outputFile, out2);
197 if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
199 in2 >> first >> second >> dist;
200 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
206 remove(tempFile.c_str());
207 distFile = outputFile;
209 if (m->control_pressed) { remove(outputFile.c_str()); }
213 catch(exception& e) {
214 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
218 /***********************************************************************/
220 ReadCluster::~ReadCluster(){}
221 /***********************************************************************/