5 * Created by Pat Schloss on 8/13/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
12 #include "utilities.hpp"
13 #include "sparsematrix.hpp"
14 #include "progress.hpp"
15 #include "listvector.hpp"
16 #include "rabundvector.hpp"
17 #include "readmatrix.hpp"
20 /***********************************************************************/
22 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
24 successOpen = openInputFile(distFile, fileHandle);
28 /***********************************************************************/
30 void ReadPhylipMatrix::read(NameAssignment* nameMap){
36 vector<string> matrixNames;
38 fileHandle >> nseqs >> name;
40 matrixNames.push_back(name);
43 list = new ListVector(nseqs);
47 list = new ListVector(nameMap->getListVector());
48 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
52 while((d=fileHandle.get()) != EOF){
56 fileHandle.putback(d);
57 for(int i=0;i<nseqs;i++){
58 fileHandle >> distance;
72 reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
76 for(int i=1;i<nseqs;i++){
78 matrixNames.push_back(name);
80 //there's A LOT of repeated code throughout this method...
85 fileHandle >> distance;
87 if(distance < cutoff){
88 PCell value(i, j, distance);
92 reading->update(index);
97 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
100 fileHandle >> distance;
102 if(distance < cutoff){
103 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
107 reading->update(index);
114 reading = new Progress("Reading matrix: ", nseqs * nseqs);
118 for(int i=1;i<nseqs;i++){
120 matrixNames.push_back(name);
124 for(int j=0;j<nseqs;j++){
125 fileHandle >> distance;
127 if(distance < cutoff && j < i){
128 PCell value(i, j, distance);
132 reading->update(index);
137 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
139 for(int j=0;j<nseqs;j++){
140 fileHandle >> distance;
142 if(distance < cutoff && j < i){
143 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
147 reading->update(index);
159 for(int i=0;i<matrixNames.size();i++){
160 nameMap->erase(matrixNames[i]);
162 if(nameMap->size() > 0){
163 //should probably tell them what is missing if we missed something
164 cout << "missed something" << '\t' << nameMap->size() << endl;
169 catch(exception& e) {
170 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179 /***********************************************************************/
181 ReadPhylipMatrix::~ReadPhylipMatrix(){
186 /***********************************************************************/
188 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
190 successOpen = openInputFile(distFile, fileHandle);
194 /***********************************************************************/
196 void ReadColumnMatrix::read(NameAssignment* nameMap){
199 string firstName, secondName;
201 int nseqs = nameMap->size();
203 list = new ListVector(nameMap->getListVector());
205 Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs);
208 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
209 int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
211 //need to see if this is a square or a triangular matrix...
212 while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
214 fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
216 if(nameMap->count(firstName)==0){
217 cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
219 if(nameMap->count(secondName)==0){
220 cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
223 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
224 if(nameMap->get(firstName) > nameMap->get(secondName)){
225 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
227 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
228 refRow = nameMap->get(firstName);
229 refCol = nameMap->get(secondName);
232 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
239 else if(nameMap->get(firstName) < nameMap->get(secondName)){
240 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
242 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
243 refRow = nameMap->get(firstName);
244 refCol = nameMap->get(secondName);
247 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
254 reading->update(nameMap->get(firstName) * nseqs);
259 if(lt == 0){ // oops, it was square
260 fileHandle.close(); //let's start over
261 D->clear(); //let's start over
263 openInputFile(distFile, fileHandle); //let's start over
266 fileHandle >> firstName >> secondName >> distance;
268 if(nameMap->count(firstName)==0){
269 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
271 if(nameMap->count(secondName)==0){
272 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
275 if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
276 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
278 reading->update(nameMap->get(firstName) * nseqs);
285 // while(fileHandle){
286 // fileHandle >> firstName >> secondName >> distance;
288 // if(nameMap->count(firstName)==0){
289 // cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
291 // if(nameMap->count(secondName)==0){
292 // cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
295 // if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
296 //// cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
297 // D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
298 // reading->update((*nameMap)[secondName].second * nseqs);
301 // gobble(fileHandle);
310 catch(exception& e) {
311 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
321 /***********************************************************************/
323 ReadColumnMatrix::~ReadColumnMatrix(){
329 /***********************************************************************/
331 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
333 successOpen = openInputFile(philFile, fileHandle);
337 /***********************************************************************/
338 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
339 void ReadPhilFile::read(GlobalData* globaldata){
341 if (globaldata->getOrderFile() == "") {
342 //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.
343 //So when you run the collect or summary commands you miss a line.
344 input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
345 inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
346 }else {//there is an orderfile
347 input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
349 globaldata->ginput = input; //saving to be used by collector and rarefact commands.
351 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
352 order = input->getOrderVector();
353 globaldata->gorder = order; //saving to be used by collect and rarefact commands.
354 sabund = inputSabund->getSAbundVector();
355 globaldata->sabund = sabund; //saving to be used by summary command.
356 }else if (globaldata->getFormat() == "shared") {
357 SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
358 globaldata->gSharedList = SharedList;
361 catch(exception& e) {
362 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
366 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
371 /***********************************************************************/
373 ReadPhilFile::~ReadPhilFile(){
378 /***********************************************************************/