5 * Created by Pat Schloss on 8/13/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
12 #include "sparsematrix.hpp"
13 #include "progress.hpp"
14 #include "listvector.hpp"
15 #include "rabundvector.hpp"
16 #include "readmatrix.hpp"
19 /***********************************************************************/
21 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
23 successOpen = openInputFile(distFile, fileHandle);
27 /***********************************************************************/
29 void ReadPhylipMatrix::read(NameAssignment* nameMap){
35 vector<string> matrixNames;
37 fileHandle >> nseqs >> name;
39 matrixNames.push_back(name);
42 list = new ListVector(nseqs);
46 list = new ListVector(nameMap->getListVector());
47 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
51 while((d=fileHandle.get()) != EOF){
55 fileHandle.putback(d);
56 for(int i=0;i<nseqs;i++){
57 fileHandle >> distance;
71 reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
75 for(int i=1;i<nseqs;i++){
77 matrixNames.push_back(name);
79 //there's A LOT of repeated code throughout this method...
84 fileHandle >> distance;
86 if(distance < cutoff){
87 PCell value(i, j, distance);
91 reading->update(index);
96 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
99 fileHandle >> distance;
101 if(distance < cutoff){
102 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
106 reading->update(index);
113 reading = new Progress("Reading matrix: ", nseqs * nseqs);
117 for(int i=1;i<nseqs;i++){
119 matrixNames.push_back(name);
123 for(int j=0;j<nseqs;j++){
124 fileHandle >> distance;
126 if(distance < cutoff && j < i){
127 PCell value(i, j, distance);
131 reading->update(index);
136 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
138 for(int j=0;j<nseqs;j++){
139 fileHandle >> distance;
141 if(distance < cutoff && j < i){
142 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
146 reading->update(index);
158 for(int i=0;i<matrixNames.size();i++){
159 nameMap->erase(matrixNames[i]);
161 if(nameMap->size() > 0){
162 //should probably tell them what is missing if we missed something
163 cout << "missed something" << '\t' << nameMap->size() << endl;
168 catch(exception& e) {
169 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 /***********************************************************************/
180 ReadPhylipMatrix::~ReadPhylipMatrix(){
185 /***********************************************************************/
187 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
189 successOpen = openInputFile(distFile, fileHandle);
193 /***********************************************************************/
195 void ReadColumnMatrix::read(NameAssignment* nameMap){
198 string firstName, secondName;
200 int nseqs = nameMap->size();
202 list = new ListVector(nameMap->getListVector());
204 Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs);
207 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
208 int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
210 //need to see if this is a square or a triangular matrix...
211 while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
213 fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
215 if(nameMap->count(firstName)==0){
216 cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
218 if(nameMap->count(secondName)==0){
219 cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
222 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
223 if(nameMap->get(firstName) > nameMap->get(secondName)){
224 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
226 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
227 refRow = nameMap->get(firstName);
228 refCol = nameMap->get(secondName);
231 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
238 else if(nameMap->get(firstName) < nameMap->get(secondName)){
239 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
241 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
242 refRow = nameMap->get(firstName);
243 refCol = nameMap->get(secondName);
246 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
253 reading->update(nameMap->get(firstName) * nseqs);
258 if(lt == 0){ // oops, it was square
259 fileHandle.close(); //let's start over
260 D->clear(); //let's start over
262 openInputFile(distFile, fileHandle); //let's start over
265 fileHandle >> firstName >> secondName >> distance;
267 if(nameMap->count(firstName)==0){
268 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
270 if(nameMap->count(secondName)==0){
271 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
274 if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
275 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
277 reading->update(nameMap->get(firstName) * nseqs);
284 // while(fileHandle){
285 // fileHandle >> firstName >> secondName >> distance;
287 // if(nameMap->count(firstName)==0){
288 // cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
290 // if(nameMap->count(secondName)==0){
291 // cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
294 // if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
295 //// cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
296 // D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
297 // reading->update((*nameMap)[secondName].second * nseqs);
300 // gobble(fileHandle);
309 catch(exception& e) {
310 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
314 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
320 /***********************************************************************/
322 ReadColumnMatrix::~ReadColumnMatrix(){
328 /***********************************************************************/
330 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
332 successOpen = openInputFile(philFile, fileHandle);
336 /***********************************************************************/
337 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
338 void ReadPhilFile::read(GlobalData* globaldata){
340 if (globaldata->getOrderFile() == "") {
341 //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.
342 //So when you run the collect or summary commands you miss a line.
343 input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
344 inputList = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
345 inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
346 }else {//there is an orderfile
347 input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
350 //memory leak prevention
351 //if (globaldata->ginput != NULL) { delete globaldata->ginput; }
352 globaldata->ginput = input; //saving to be used by collector and rarefact commands.
354 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
355 order = input->getOrderVector();
356 //memory leak prevention
357 //if (globaldata->gorder != NULL) { delete globaldata->gorder; }
358 globaldata->gorder = order; //saving to be used by collect and rarefact commands.
359 sabund = inputSabund->getSAbundVector();
360 globaldata->sabund = sabund; //saving to be used by summary command.
361 list = inputList->getListVector();
362 globaldata->gListVector = list;
363 }else if (globaldata->getFormat() == "shared") {
364 SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
365 //memory leak prevention
366 //if (globaldata->gSharedList != NULL) { delete globaldata->gSharedList; }
367 globaldata->gSharedList = SharedList;
370 catch(exception& e) {
371 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
375 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
380 /***********************************************************************/
382 ReadPhilFile::~ReadPhilFile(){
387 /***********************************************************************/