5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15 m = MothurOut::getInstance();
23 /***********************************************************************/
25 int ReadCluster::read(NameAssignment* nameMap){
28 if (format == "phylip") { convertPhylip2Column(nameMap); }
29 else { list = new ListVector(nameMap->getListVector()); }
31 if (m->control_pressed) { return 0; }
33 if (sortWanted) { OutPutFile = m->sortFile(distFile, outputDir); }
34 else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column
40 m->errorOut(e, "ReadCluster", "read");
44 /***********************************************************************/
46 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
48 //convert phylip file to column file
49 map<int, string> rowToName;
50 map<int, string>::iterator it;
54 string tempFile = distFile + ".column.temp";
56 m->openInputFile(distFile, in); m->gobble(in);
57 m->openOutputFile(tempFile, out);
62 vector<string> matrixNames;
66 matrixNames.push_back(name);
69 list = new ListVector(nseqs);
73 list = new ListVector(nameMap->getListVector());
74 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
78 while((d=in.get()) != EOF){
83 for(int i=0;i<nseqs;i++){
96 for(int i=1;i<nseqs;i++){
99 matrixNames.push_back(name);
101 //there's A LOT of repeated code throughout this method...
105 for(int j=0;j<i;j++){
107 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
111 if (distance == -1) { distance = 1000000; }
113 if(distance < cutoff){
114 out << i << '\t' << j << '\t' << distance << endl;
120 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
122 for(int j=0;j<i;j++){
124 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
128 if (distance == -1) { distance = 1000000; }
130 if(distance < cutoff){
131 out << i << '\t' << j << '\t' << distance << endl;
138 for(int i=1;i<nseqs;i++){
141 matrixNames.push_back(name);
145 for(int j=0;j<nseqs;j++){
146 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
150 if (distance == -1) { distance = 1000000; }
152 if(distance < cutoff && j < i){
153 out << i << '\t' << j << '\t' << distance << endl;
158 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
160 for(int j=0;j<nseqs;j++){
161 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
165 if (distance == -1) { distance = 1000000; }
167 if(distance < cutoff && j < i){
168 out << i << '\t' << j << '\t' << distance << endl;
181 nameMap = new NameAssignment();
182 for(int i=0;i<matrixNames.size();i++){
183 nameMap->push_back(matrixNames[i]);
191 string outputFile = m->getRootName(distFile) + "column.dist";
192 m->openInputFile(tempFile, in2);
193 m->openOutputFile(outputFile, out2);
199 if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
201 in2 >> first >> second >> dist;
202 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
208 remove(tempFile.c_str());
209 distFile = outputFile;
211 if (m->control_pressed) { remove(outputFile.c_str()); }
215 catch(exception& e) {
216 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
220 /***********************************************************************/
222 ReadCluster::~ReadCluster(){}
223 /***********************************************************************/