5 * Created by westcott on 10/28/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "readcluster.h"
12 /***********************************************************************/
14 ReadCluster::ReadCluster(string distfile, float c){
15 globaldata = GlobalData::getInstance();
20 /***********************************************************************/
22 void ReadCluster::read(NameAssignment* nameMap){
25 if (format == "phylip") { convertPhylip2Column(nameMap); }
26 else { list = new ListVector(nameMap->getListVector()); }
32 errorOut(e, "ReadCluster", "read");
36 /***********************************************************************/
38 void ReadCluster::createHClusterFile(){
40 string outfile = getRootName(distFile) + "sorted.dist";
42 //if you can, use the unix sort since its been optimized for years
43 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
44 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
45 system(command.c_str());
46 #else //you are stuck with my best attempt...
47 //windows sort does not have a way to specify a column, only a character in the line
48 //since we cannot assume that the distance will always be at the the same character location on each line
49 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
51 //read in file line by file and put distance first
52 string tempDistFile = distFile + ".temp";
55 openInputFile(distFile, input);
56 openOutputFile(tempDistFile, output);
58 string firstName, secondName;
61 input >> firstName >> secondName >> dist;
62 output << dist << '\t' << firstName << '\t' << secondName << endl;
69 //sort using windows sort
70 string tempOutfile = outfile + ".temp";
71 string command = "sort " + tempDistFile + " /O " + tempOutfile;
72 system(command.c_str());
74 //read in sorted file and put distance at end again
76 openInputFile(tempOutfile, input2);
77 openOutputFile(outfile, output);
80 input2 >> dist >> firstName >> secondName;
81 output << firstName << '\t' << secondName << '\t' << dist << endl;
88 remove(tempDistFile.c_str());
89 remove(tempOutfile.c_str());
95 errorOut(e, "ReadCluster", "createHClusterFile");
101 /***********************************************************************/
103 void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
105 //convert phylip file to column file
106 map<int, string> rowToName;
107 map<int, string>::iterator it;
111 string tempFile = distFile + ".column.temp";
113 openInputFile(distFile, in);
114 openOutputFile(tempFile, out);
119 vector<string> matrixNames;
123 matrixNames.push_back(name);
126 list = new ListVector(nseqs);
130 list = new ListVector(nameMap->getListVector());
131 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
135 while((d=in.get()) != EOF){
140 for(int i=0;i<nseqs;i++){
153 for(int i=1;i<nseqs;i++){
156 matrixNames.push_back(name);
158 //there's A LOT of repeated code throughout this method...
162 for(int j=0;j<i;j++){
165 if (distance == -1) { distance = 1000000; }
167 if(distance < cutoff){
168 out << i << '\t' << j << '\t' << distance << endl;
174 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
176 for(int j=0;j<i;j++){
179 if (distance == -1) { distance = 1000000; }
181 if(distance < cutoff){
182 out << i << '\t' << j << '\t' << distance << endl;
189 for(int i=1;i<nseqs;i++){
192 matrixNames.push_back(name);
196 for(int j=0;j<nseqs;j++){
199 if (distance == -1) { distance = 1000000; }
201 if(distance < cutoff && j < i){
202 out << i << '\t' << j << '\t' << distance << endl;
207 if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
209 for(int j=0;j<nseqs;j++){
212 if (distance == -1) { distance = 1000000; }
214 if(distance < cutoff && j < i){
215 out << i << '\t' << j << '\t' << distance << endl;
228 nameMap = new NameAssignment();
229 for(int i=0;i<matrixNames.size();i++){
230 nameMap->push_back(matrixNames[i]);
232 globaldata->nameMap = nameMap;
239 string outputFile = getRootName(distFile) + "column.dist";
240 openInputFile(tempFile, in2);
241 openOutputFile(outputFile, out2);
247 in2 >> first >> second >> dist;
248 out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
254 remove(tempFile.c_str());
255 distFile = outputFile;
257 catch(exception& e) {
258 errorOut(e, "ReadCluster", "convertPhylip2Column");
262 /***********************************************************************/
264 ReadCluster::~ReadCluster(){}
265 /***********************************************************************/