5 * Created by Pat Schloss on 8/13/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
14 #include "utilities.hpp"
15 #include "sparsematrix.hpp"
16 #include "progress.hpp"
17 #include "listvector.hpp"
18 #include "rabundvector.hpp"
21 #include "readmatrix.hpp"
24 /***********************************************************************/
26 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
28 successOpen = openInputFile(distFile, fileHandle);
32 /***********************************************************************/
34 void ReadPhylipMatrix::read(NameAssignment* nameMap){
40 vector<string> matrixNames;
42 fileHandle >> nseqs >> name;
44 matrixNames.push_back(name);
47 list = new ListVector(nseqs);
51 list = new ListVector(nameMap->getListVector());
52 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
56 while((d=fileHandle.get()) != EOF){
60 fileHandle.putback(d);
61 for(int i=0;i<nseqs;i++){
62 fileHandle >> distance;
76 reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
80 for(int i=1;i<nseqs;i++){
82 matrixNames.push_back(name);
84 //there's A LOT of repeated code throughout this method...
89 fileHandle >> distance;
91 if(distance < cutoff){
92 PCell value(i, j, distance);
96 reading->update(index);
101 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
103 for(int j=0;j<i;j++){
104 fileHandle >> distance;
106 if(distance < cutoff){
107 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
111 reading->update(index);
118 reading = new Progress("Reading matrix: ", nseqs * nseqs);
122 for(int i=1;i<nseqs;i++){
124 matrixNames.push_back(name);
128 for(int j=0;j<nseqs;j++){
129 fileHandle >> distance;
131 if(distance < cutoff && j < i){
132 PCell value(i, j, distance);
136 reading->update(index);
141 if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
143 for(int j=0;j<nseqs;j++){
144 fileHandle >> distance;
146 if(distance < cutoff && j < i){
147 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
151 reading->update(index);
163 for(int i=0;i<matrixNames.size();i++){
164 nameMap->erase(matrixNames[i]);
166 if(nameMap->size() > 0){
167 //should probably tell them what is missing if we missed something
168 cout << "missed something" << '\t' << nameMap->size() << endl;
173 catch(exception& e) {
174 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183 /***********************************************************************/
185 ReadPhylipMatrix::~ReadPhylipMatrix(){
190 /***********************************************************************/
192 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
194 successOpen = openInputFile(distFile, fileHandle);
198 /***********************************************************************/
200 void ReadColumnMatrix::read(NameAssignment* nameMap){
203 string firstName, secondName;
205 int nseqs = nameMap->size();
207 list = new ListVector(nameMap->getListVector());
209 Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs);
212 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
213 int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
215 //need to see if this is a square or a triangular matrix...
216 while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
218 fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
220 if(nameMap->count(firstName)==0){
221 cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
223 if(nameMap->count(secondName)==0){
224 cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
227 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
228 if(nameMap->get(firstName) > nameMap->get(secondName)){
229 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
231 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
232 refRow = nameMap->get(firstName);
233 refCol = nameMap->get(secondName);
236 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
243 else if(nameMap->get(firstName) < nameMap->get(secondName)){
244 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
246 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
247 refRow = nameMap->get(firstName);
248 refCol = nameMap->get(secondName);
251 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
258 reading->update(nameMap->get(firstName) * nseqs);
263 if(lt == 0){ // oops, it was square
264 fileHandle.close(); //let's start over
265 D->clear(); //let's start over
267 openInputFile(distFile, fileHandle); //let's start over
270 fileHandle >> firstName >> secondName >> distance;
272 if(nameMap->count(firstName)==0){
273 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
275 if(nameMap->count(secondName)==0){
276 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
279 if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
280 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
282 reading->update(nameMap->get(firstName) * nseqs);
289 // while(fileHandle){
290 // fileHandle >> firstName >> secondName >> distance;
292 // if(nameMap->count(firstName)==0){
293 // cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
295 // if(nameMap->count(secondName)==0){
296 // cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
299 // if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
300 //// cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
301 // D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
302 // reading->update((*nameMap)[secondName].second * nseqs);
305 // gobble(fileHandle);
314 catch(exception& e) {
315 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
319 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
325 /***********************************************************************/
327 ReadColumnMatrix::~ReadColumnMatrix(){
333 /***********************************************************************/
335 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
337 successOpen = openInputFile(philFile, fileHandle);
341 /***********************************************************************/
342 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
343 void ReadPhilFile::read(GlobalData* globaldata){
345 if (globaldata->getOrderFile() == "") {
346 //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.
347 //So when you run the collect or summary commands you miss a line.
348 input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
349 inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
350 }else {//there is an orderfile
351 input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
353 globaldata->ginput = input; //saving to be used by collector and rarefact commands.
355 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
356 order = input->getOrderVector();
357 globaldata->gorder = order; //saving to be used by collect and rarefact commands.
358 sabund = inputSabund->getSAbundVector();
359 globaldata->sabund = sabund; //saving to be used by summary command.
360 }else if (globaldata->getFormat() == "shared") {
361 SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
362 globaldata->gSharedList = SharedList;
365 catch(exception& e) {
366 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
375 /***********************************************************************/
377 ReadPhilFile::~ReadPhilFile(){
382 /***********************************************************************/