]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
created mothurOut class to handle logfiles
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c){
15                 globaldata = GlobalData::getInstance();
16                 m = MothurOut::getInstance();
17         distFile = distfile;
18                 cutoff = c;
19 }
20
21 /***********************************************************************/
22
23 void ReadCluster::read(NameAssignment* nameMap){
24         try {
25         
26                 if (format == "phylip") { convertPhylip2Column(nameMap); }
27                 else { list = new ListVector(nameMap->getListVector());  }
28                 
29                 OutPutFile = sortFile(distFile);
30                         
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ReadCluster", "read");
34                 exit(1);
35         }
36 }
37 /***********************************************************************/
38
39 void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
40         try {   
41                 //convert phylip file to column file
42                 map<int, string> rowToName;
43                 map<int, string>::iterator it;
44                 
45                 ifstream in;
46                 ofstream out;
47                 string tempFile = distFile + ".column.temp";
48                 
49                 openInputFile(distFile, in);
50                 openOutputFile(tempFile, out);
51                 
52                 float distance;
53                 int square, nseqs;
54                 string name;
55                 vector<string> matrixNames;
56         
57                 in >> nseqs >> name;
58                 rowToName[0] = name;
59                 matrixNames.push_back(name);
60                 
61                 if(nameMap == NULL){
62                         list = new ListVector(nseqs);
63                         list->set(0, name);
64                 }
65                 else{
66                         list = new ListVector(nameMap->getListVector());
67                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
68                 }
69         
70                 char d;
71                 while((d=in.get()) != EOF){
72                         
73                         if(isalnum(d)){
74                                 square = 1;
75                                 in.putback(d);
76                                 for(int i=0;i<nseqs;i++){
77                                         in >> distance;
78                                 }
79                                 break;
80                         }
81                         if(d == '\n'){
82                                 square = 0;
83                                 break;
84                         }
85                 }
86         
87                 if(square == 0){
88                                         
89                         for(int i=1;i<nseqs;i++){
90                                 in >> name;
91                                 rowToName[i] = name;
92                                 matrixNames.push_back(name);
93                                 
94                                 //there's A LOT of repeated code throughout this method...
95                                 if(nameMap == NULL){
96                                         list->set(i, name);
97                                         
98                                         for(int j=0;j<i;j++){
99                                                 in >> distance;
100                                                 
101                                                 if (distance == -1) { distance = 1000000; }
102                                                 
103                                                 if(distance < cutoff){
104                                                         out << i << '\t' << j << '\t' << distance << endl;
105                                                 }
106                                         }
107                                         
108                                 }
109                                 else{
110                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
111                                         
112                                         for(int j=0;j<i;j++){
113                                                 in >> distance;
114                                                 
115                                                 if (distance == -1) { distance = 1000000; }
116                                                 
117                                                 if(distance < cutoff){
118                                                         out << i << '\t' << j << '\t' << distance << endl;
119                                                 }
120                                         }
121                                 }
122                         }
123                 }
124                 else{
125                         for(int i=1;i<nseqs;i++){
126                                 in >> name;                
127                                 rowToName[i] = name;
128                                 matrixNames.push_back(name);
129                 
130                                 if(nameMap == NULL){
131                                         list->set(i, name);
132                                         for(int j=0;j<nseqs;j++){
133                                                 in >> distance;
134                                         
135                                                 if (distance == -1) { distance = 1000000; }
136                                                 
137                                                 if(distance < cutoff && j < i){
138                                                         out << i << '\t' << j << '\t' << distance << endl;
139                                                 }
140                                         }
141                                 }
142                                 else{
143                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
144                                         
145                                         for(int j=0;j<nseqs;j++){
146                                                 in >> distance;
147                         
148                                                 if (distance == -1) { distance = 1000000; }
149                                                 
150                                                 if(distance < cutoff && j < i){
151                                                         out << i << '\t' << j << '\t' << distance << endl;
152                                                 }
153                                                 
154                                         }
155                                 }
156                         }
157                 }
158                 
159                 list->setLabel("0");
160                 in.close();
161                 out.close();
162                 
163                 if(nameMap == NULL){
164                         nameMap = new NameAssignment();
165                         for(int i=0;i<matrixNames.size();i++){
166                                 nameMap->push_back(matrixNames[i]);
167                         }
168                         globaldata->nameMap = nameMap;
169                 }
170                 
171         
172                 ifstream in2;
173                 ofstream out2;
174                 
175                 string outputFile = getRootName(distFile) + "column.dist";
176                 openInputFile(tempFile, in2);
177                 openOutputFile(outputFile, out2);
178                 
179                 int first, second;
180                 float dist;
181                 
182                 while (in2) {
183                         in2 >> first >> second >> dist;
184                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
185                         gobble(in2);
186                 }
187                 in2.close();
188                 out2.close();
189                 
190                 remove(tempFile.c_str());
191                 distFile = outputFile;
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
195                 exit(1);
196         }
197 }
198 /***********************************************************************/
199
200 ReadCluster::~ReadCluster(){}
201 /***********************************************************************/
202