5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c, string o){
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
22 /***********************************************************************/
24 int ReadCluster::read(NameAssignment* nameMap){
27 if (format == "phylip") { convertPhylip2Column(nameMap); }
28 else { list = new ListVector(nameMap->getListVector()); }
30 if (m->control_pressed) { return 0; }
32 OutPutFile = sortFile(distFile, outputDir);
38 m->errorOut(e, "ReadCluster", "read");
42 /***********************************************************************/
44 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
46 //convert phylip file to column file
47 map<int, string> rowToName;
48 map<int, string>::iterator it;
52 string tempFile = distFile + ".column.temp";
54 openInputFile(distFile, in);
55 openOutputFile(tempFile, out);
60 vector<string> matrixNames;
64 matrixNames.push_back(name);
67 list = new ListVector(nseqs);
71 list = new ListVector(nameMap->getListVector());
72 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
76 while((d=in.get()) != EOF){
81 for(int i=0;i<nseqs;i++){
94 for(int i=1;i<nseqs;i++){
97 matrixNames.push_back(name);
99 //there's A LOT of repeated code throughout this method...
103 for(int j=0;j<i;j++){
105 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
109 if (distance == -1) { distance = 1000000; }
111 if(distance < cutoff){
112 out << i << '\t' << j << '\t' << distance << endl;
118 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
120 for(int j=0;j<i;j++){
122 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
126 if (distance == -1) { distance = 1000000; }
128 if(distance < cutoff){
129 out << i << '\t' << j << '\t' << distance << endl;
136 for(int i=1;i<nseqs;i++){
139 matrixNames.push_back(name);
143 for(int j=0;j<nseqs;j++){
144 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
148 if (distance == -1) { distance = 1000000; }
150 if(distance < cutoff && j < i){
151 out << i << '\t' << j << '\t' << distance << endl;
156 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
158 for(int j=0;j<nseqs;j++){
159 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
163 if (distance == -1) { distance = 1000000; }
165 if(distance < cutoff && j < i){
166 out << i << '\t' << j << '\t' << distance << endl;
179 nameMap = new NameAssignment();
180 for(int i=0;i<matrixNames.size();i++){
181 nameMap->push_back(matrixNames[i]);
183 globaldata->nameMap = nameMap;
190 string outputFile = getRootName(distFile) + "column.dist";
191 openInputFile(tempFile, in2);
192 openOutputFile(outputFile, out2);
198 if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
200 in2 >> first >> second >> dist;
201 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
207 remove(tempFile.c_str());
208 distFile = outputFile;
210 if (m->control_pressed) { remove(outputFile.c_str()); }
214 catch(exception& e) {
215 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
219 /***********************************************************************/
221 ReadCluster::~ReadCluster(){}
222 /***********************************************************************/