5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
24 /***********************************************************************/
26 int ReadCluster::read(NameAssignment* nameMap){
29 if (format == "phylip") { convertPhylip2Column(nameMap); }
30 else { list = new ListVector(nameMap->getListVector()); }
32 if (m->control_pressed) { return 0; }
34 if (sortWanted) { OutPutFile = sortFile(distFile, outputDir); }
35 else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column
41 m->errorOut(e, "ReadCluster", "read");
45 /***********************************************************************/
47 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
49 //convert phylip file to column file
50 map<int, string> rowToName;
51 map<int, string>::iterator it;
55 string tempFile = distFile + ".column.temp";
57 openInputFile(distFile, in); gobble(in);
58 openOutputFile(tempFile, out);
63 vector<string> matrixNames;
67 matrixNames.push_back(name);
70 list = new ListVector(nseqs);
74 list = new ListVector(nameMap->getListVector());
75 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
79 while((d=in.get()) != EOF){
84 for(int i=0;i<nseqs;i++){
97 for(int i=1;i<nseqs;i++){
100 matrixNames.push_back(name);
102 //there's A LOT of repeated code throughout this method...
106 for(int j=0;j<i;j++){
108 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
112 if (distance == -1) { distance = 1000000; }
114 if(distance < cutoff){
115 out << i << '\t' << j << '\t' << distance << endl;
121 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
123 for(int j=0;j<i;j++){
125 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
129 if (distance == -1) { distance = 1000000; }
131 if(distance < cutoff){
132 out << i << '\t' << j << '\t' << distance << endl;
139 for(int i=1;i<nseqs;i++){
142 matrixNames.push_back(name);
146 for(int j=0;j<nseqs;j++){
147 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
151 if (distance == -1) { distance = 1000000; }
153 if(distance < cutoff && j < i){
154 out << i << '\t' << j << '\t' << distance << endl;
159 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
161 for(int j=0;j<nseqs;j++){
162 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
166 if (distance == -1) { distance = 1000000; }
168 if(distance < cutoff && j < i){
169 out << i << '\t' << j << '\t' << distance << endl;
182 nameMap = new NameAssignment();
183 for(int i=0;i<matrixNames.size();i++){
184 nameMap->push_back(matrixNames[i]);
186 globaldata->nameMap = nameMap;
193 string outputFile = getRootName(distFile) + "column.dist";
194 openInputFile(tempFile, in2);
195 openOutputFile(outputFile, out2);
201 if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
203 in2 >> first >> second >> dist;
204 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
210 remove(tempFile.c_str());
211 distFile = outputFile;
213 if (m->control_pressed) { remove(outputFile.c_str()); }
217 catch(exception& e) {
218 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
222 /***********************************************************************/
224 ReadCluster::~ReadCluster(){}
225 /***********************************************************************/