5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c){
19 /***********************************************************************/
21 void ReadCluster::read(NameAssignment* nameMap){
24 if (format == "phylip") { convertPhylip2Column(nameMap); }
25 else { list = new ListVector(nameMap->getListVector()); }
31 errorOut(e, "ReadCluster", "read");
35 /***********************************************************************/
37 void ReadCluster::createHClusterFile(){
39 string outfile = getRootName(distFile) + "sorted.dist";
41 //if you can, use the unix sort since its been optimized for years
42 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
43 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
44 system(command.c_str());
45 #else //you are stuck with my best attempt...
46 //windows sort does not have a way to specify a column, only a character in the line
47 //since we cannot assume that the distance will always be at the the same character location on each line
48 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
50 //read in file line by file and put distance first
51 string tempDistFile = distFile + ".temp";
54 openInputFile(distFile, input);
55 openOutputFile(tempDistFile, output);
57 string firstName, secondName;
60 input >> firstName >> secondName >> dist;
61 output << dist << '\t' << firstName << '\t' << secondName << endl;
68 //sort using windows sort
69 string tempOutfile = outfile + ".temp";
70 string command = "sort " + tempDistFile + " /O " + tempOutfile;
71 system(command.c_str());
73 //read in sorted file and put distance at end again
75 openInputFile(tempOutfile, input2);
76 openOutputFile(outfile, output);
79 input2 >> dist >> firstName >> secondName;
80 output << firstName << '\t' << secondName << '\t' << dist << endl;
87 remove(tempDistFile.c_str());
88 remove(tempOutfile.c_str());
94 errorOut(e, "ReadCluster", "createHClusterFile");
100 /***********************************************************************/
102 void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
104 //convert phylip file to column file
105 map<int, string> rowToName;
106 map<int, string>::iterator it;
110 string tempFile = distFile + ".column.temp";
112 openInputFile(distFile, in);
113 openOutputFile(tempFile, out);
118 vector<string> matrixNames;
122 matrixNames.push_back(name);
125 list = new ListVector(nseqs);
129 list = new ListVector(nameMap->getListVector());
130 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
134 while((d=in.get()) != EOF){
139 for(int i=0;i<nseqs;i++){
152 for(int i=1;i<nseqs;i++){
155 matrixNames.push_back(name);
157 //there's A LOT of repeated code throughout this method...
161 for(int j=0;j<i;j++){
164 if (distance == -1) { distance = 1000000; }
166 if(distance < cutoff){
167 out << i << '\t' << j << '\t' << distance << endl;
173 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
175 for(int j=0;j<i;j++){
178 if (distance == -1) { distance = 1000000; }
180 if(distance < cutoff){
181 out << i << '\t' << j << '\t' << distance << endl;
189 for(int i=1;i<nseqs;i++){
192 matrixNames.push_back(name);
196 for(int j=0;j<nseqs;j++){
199 if (distance == -1) { distance = 1000000; }
201 if(distance < cutoff && j < i){
202 out << i << '\t' << j << '\t' << distance << endl;
208 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
210 for(int j=0;j<nseqs;j++){
213 if (distance == -1) { distance = 1000000; }
215 if(distance < cutoff && j < i){
216 out << i << '\t' << j << '\t' << distance << endl;
229 for(int i=0;i<matrixNames.size();i++){
230 nameMap->push_back(matrixNames[i]);
237 string outputFile = getRootName(distFile) + "column.dist";
238 openInputFile(tempFile, in2);
239 openOutputFile(outputFile, out2);
245 in2 >> first >> second >> dist;
246 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
252 remove(tempFile.c_str());
253 distFile = outputFile;
255 catch(exception& e) {
256 errorOut(e, "ReadCluster", "convertPhylip2Column");
260 /***********************************************************************/
262 ReadCluster::~ReadCluster(){}
263 /***********************************************************************/