From: westcott Date: Tue, 12 May 2009 16:31:52 +0000 (+0000) Subject: added distance command and filterseqs X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=d5d2761f88b41f1006d0b700e0ab51e2ce48b875 added distance command and filterseqs --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index d29d52e..7beb6cb 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -41,7 +41,6 @@ 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF20FB0644D004D63F5 /* readclustal.cpp */; }; 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF40FB0644D004D63F5 /* readfasta.cpp */; }; 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF60FB0644D004D63F5 /* readnexus.cpp */; }; - 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */; }; 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */; }; 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */; }; 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 378C1AFF0FB0644D004D63F5 /* sharedjackknife.cpp */; }; @@ -49,6 +48,7 @@ 379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; }; 379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; }; 3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; }; + 379643ED0FB9B5A80081FDB6 /* readseqs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379643EB0FB9B5A80081FDB6 /* readseqs.cpp */; }; 37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */; }; 37AD4DBB0F28E2FE00AA2D49 /* tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */; }; 37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; }; @@ -225,8 +225,6 @@ 378C1AF60FB0644D004D63F5 /* readnexus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readnexus.cpp; sourceTree = ""; }; 378C1AF70FB0644D004D63F5 /* readnexus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexus.h; sourceTree = ""; }; 378C1AF80FB0644D004D63F5 /* readnexusal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readnexusal.h; sourceTree = ""; }; - 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqscommand.cpp; sourceTree = ""; }; - 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqscommand.h; sourceTree = ""; }; 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqsphylip.cpp; sourceTree = ""; }; 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqsphylip.h; sourceTree = ""; }; 378C1AFD0FB0644D004D63F5 /* sequencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencedb.cpp; sourceTree = ""; }; @@ -242,6 +240,8 @@ 379294880F2E258500B9034A /* parsimony.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimony.h; sourceTree = ""; }; 379294890F2E258500B9034A /* parsimony.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsimony.cpp; sourceTree = ""; }; 3792948D0F2E271100B9034A /* treecalculator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treecalculator.h; sourceTree = ""; }; + 379643EB0FB9B5A80081FDB6 /* readseqs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readseqs.cpp; sourceTree = ""; }; + 379643EC0FB9B5A80081FDB6 /* readseqs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readseqs.h; sourceTree = ""; }; 37AD4CE20F28AEA300AA2D49 /* sharedlistvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedlistvector.h; sourceTree = ""; }; 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedlistvector.cpp; sourceTree = ""; }; 37AD4DB90F28E2FE00AA2D49 /* tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = ""; }; @@ -484,7 +484,6 @@ 7E4130F70F8E58FA00381DD0 /* dlibshuff.h */, 7E4130F60F8E58FA00381DD0 /* dlibshuff.cpp */, 37D927D60F21331F001D4494 /* display.h */, - 37C753DF0FB3492400DBD02E /* dist.h */, 37D927D80F21331F001D4494 /* engine.hpp */, 37D927D70F21331F001D4494 /* engine.cpp */, 37D928B10F213472001D4494 /* errorcheckor */, @@ -514,24 +513,7 @@ 37D928060F21331F001D4494 /* rarefact.h */, 37D928050F21331F001D4494 /* rarefact.cpp */, 37D928090F21331F001D4494 /* rarefactioncurvedata.h */, - 378C1AF30FB0644D004D63F5 /* readclustal.h */, - 378C1AF20FB0644D004D63F5 /* readclustal.cpp */, - 375AA1340F9E433D008EF9B8 /* readcolumn.h */, - 375AA1330F9E433D008EF9B8 /* readcolumn.cpp */, - 378C1AF50FB0644D004D63F5 /* readfasta.h */, - 378C1AF40FB0644D004D63F5 /* readfasta.cpp */, - 37D928130F21331F001D4494 /* readmatrix.hpp */, - 378C1AF70FB0644D004D63F5 /* readnexus.h */, - 378C1AF60FB0644D004D63F5 /* readnexus.cpp */, - 378C1AF80FB0644D004D63F5 /* readnexusal.h */, - 375AA1360F9E433D008EF9B8 /* readotu.h */, - 375AA1350F9E433D008EF9B8 /* readotu.cpp */, - 375AA1380F9E433D008EF9B8 /* readphylip.h */, - 375AA1370F9E433D008EF9B8 /* readphylip.cpp */, - 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */, - 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */, - 37AD4DC80F28F3DD00AA2D49 /* readtree.h */, - 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */, + 3796441D0FB9B9650081FDB6 /* read */, 37D928210F21331F001D4494 /* shared.h */, 37D928200F21331F001D4494 /* shared.cpp */, 211C38310F961DD400FEE541 /* sharedutilities.h */, @@ -555,6 +537,33 @@ name = Products; sourceTree = ""; }; + 3796441D0FB9B9650081FDB6 /* read */ = { + isa = PBXGroup; + children = ( + 378C1AF30FB0644D004D63F5 /* readclustal.h */, + 378C1AF20FB0644D004D63F5 /* readclustal.cpp */, + 375AA1340F9E433D008EF9B8 /* readcolumn.h */, + 375AA1330F9E433D008EF9B8 /* readcolumn.cpp */, + 378C1AF50FB0644D004D63F5 /* readfasta.h */, + 378C1AF40FB0644D004D63F5 /* readfasta.cpp */, + 37D928130F21331F001D4494 /* readmatrix.hpp */, + 378C1AF70FB0644D004D63F5 /* readnexus.h */, + 378C1AF60FB0644D004D63F5 /* readnexus.cpp */, + 378C1AF80FB0644D004D63F5 /* readnexusal.h */, + 375AA1360F9E433D008EF9B8 /* readotu.h */, + 375AA1350F9E433D008EF9B8 /* readotu.cpp */, + 375AA1380F9E433D008EF9B8 /* readphylip.h */, + 375AA1370F9E433D008EF9B8 /* readphylip.cpp */, + 378C1AFC0FB0644D004D63F5 /* readseqsphylip.h */, + 378C1AFB0FB0644D004D63F5 /* readseqsphylip.cpp */, + 379643EB0FB9B5A80081FDB6 /* readseqs.cpp */, + 379643EC0FB9B5A80081FDB6 /* readseqs.h */, + 37AD4DC80F28F3DD00AA2D49 /* readtree.h */, + 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */, + ); + name = read; + sourceTree = ""; + }; 37D928A60F2133C0001D4494 /* calculators */ = { isa = PBXGroup; children = ( @@ -573,6 +582,7 @@ 37D927BF0F21331F001D4494 /* chao1.cpp */, 7EC3D4510FA0FFF900338DA5 /* coverage.h */, 7EC3D4500FA0FFF900338DA5 /* coverage.cpp */, + 37C753DF0FB3492400DBD02E /* dist.h */, 37C753F00FB34AE800DBD02E /* eachgapdist.h */, 37C753F40FB34C0300DBD02E /* eachgapignore.h */, EB9303F70F53517300E8EF26 /* geom.h */, @@ -710,8 +720,6 @@ 372E12950F263D5A0095CF7E /* readdistcommand.cpp */, 372E126E0F26365B0095CF7E /* readotucommand.h */, 372E126F0F26365B0095CF7E /* readotucommand.cpp */, - 378C1AFA0FB0644D004D63F5 /* readseqscommand.h */, - 378C1AF90FB0644D004D63F5 /* readseqscommand.cpp */, 37E5F4900F2A3DA800F8D827 /* readtreecommand.h */, 37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */, 37D928270F21331F001D4494 /* sharedcommand.h */, @@ -965,12 +973,12 @@ 378C1B050FB0644E004D63F5 /* readclustal.cpp in Sources */, 378C1B060FB0644E004D63F5 /* readfasta.cpp in Sources */, 378C1B070FB0644E004D63F5 /* readnexus.cpp in Sources */, - 378C1B080FB0644E004D63F5 /* readseqscommand.cpp in Sources */, 378C1B090FB0644E004D63F5 /* readseqsphylip.cpp in Sources */, 378C1B0A0FB0644E004D63F5 /* sequencedb.cpp in Sources */, 378C1B0B0FB0644E004D63F5 /* sharedjackknife.cpp in Sources */, 378C1B0C0FB0644E004D63F5 /* sharedmarczewski.cpp in Sources */, 37C753CE0FB3415200DBD02E /* distancecommand.cpp in Sources */, + 379643ED0FB9B5A80081FDB6 /* readseqs.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index e357e6c..ea43c48 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -11,7 +11,6 @@ #include "readdistcommand.h" #include "readtreecommand.h" #include "readotucommand.h" -#include "readseqscommand.h" #include "clustercommand.h" #include "parselistcommand.h" #include "collectcommand.h" @@ -41,6 +40,7 @@ #include "treegroupscommand.h" #include "bootstrapsharedcommand.h" #include "concensuscommand.h" +#include "distancecommand.h" /***********************************************************/ @@ -66,7 +66,6 @@ Command* CommandFactory::getCommand(string commandName){ if(commandName == "read.dist") { command = new ReadDistCommand(); } else if(commandName == "read.otu") { command = new ReadOtuCommand(); } - else if(commandName == "read.seqs") { command = new ReadSeqsCommand(); } else if(commandName == "read.tree") { command = new ReadTreeCommand(); } else if(commandName == "cluster") { command = new ClusterCommand(); } else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); } @@ -93,6 +92,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "tree.shared") { command = new TreeGroupCommand(); } else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(); } else if(commandName == "concensus") { command = new ConcensusCommand(); } + else if(commandName == "distance") { command = new DistanceCommand(); } else { command = new NoCommand(); } return command; diff --git a/distancecommand.cpp b/distancecommand.cpp index c20ddb9..0714ab3 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -21,10 +21,8 @@ DistanceCommand::DistanceCommand(){ globaldata = GlobalData::getInstance(); validCalculator = new ValidCalculators(); ends = globaldata->getEnds(); - seqDB = globaldata->gSequenceDB; convert(globaldata->getProcessors(), processors); convert(globaldata->getCutOff(), cutoff); - distFile = getRootName(globaldata->getFastaFile()) + "dist"; int i; if (ends != "T") { @@ -52,7 +50,6 @@ DistanceCommand::DistanceCommand(){ } } - //reset calc for next command globaldata->setCalc(""); @@ -71,9 +68,28 @@ DistanceCommand::DistanceCommand(){ int DistanceCommand::execute(){ try { + + //read file + string filename = globaldata->inputFileName; + + if(globaldata->getFastaFile() != "") { + readSeqs = new ReadFasta(filename); } + else if(globaldata->getNexusFile() != "") { + readSeqs = new ReadNexus(filename); } + else if(globaldata->getClustalFile() != "") { + readSeqs = new ReadClustal(filename); } + else if(globaldata->getPhylipFile() != "") { + readSeqs = new ReadPhylip(filename); } + + readSeqs->read(); + seqDB = readSeqs->getDB(); + int numSeqs = seqDB->getNumSeqs(); - system(("rm "+distFile).c_str() ); + string distFile = getRootName(globaldata->getFastaFile()) + "dist"; + + remove(distFile.c_str()); + if(processors == 1){ driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff); } @@ -82,13 +98,13 @@ int DistanceCommand::execute(){ int pid = fork(); if(pid > 0){ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff); -// system(("cat " + distFile + "tempa" + " >> " + distFile).c_str()); -// system(("rm " + distFile + "tempa").c_str()); + appendFiles((distFile+"tempa"), distFile); + remove((distFile + "tempa").c_str()); } else{ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff); -// system(("cat " + distFile + "tempb" + " >> " + distFile).c_str()); -// system(("rm " + distFile + "tempb").c_str()); + appendFiles((distFile+"tempb"), distFile); + remove((distFile + "tempb").c_str()); } wait(NULL); @@ -99,37 +115,20 @@ int DistanceCommand::execute(){ int pid2 = fork(); if(pid2 > 0){ driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff); - #ifdef HAVE_CAT - system(("cat " + distFile + "tempa" + " >> " + distFile).c_str()); - #else - #ifdef HAVE_COPY -//get system call from pat system(("copy " + distFile + "tempa").c_str()); - #else - cout << "Sorry but I can't continue because this operating system doesn't appear to support the cat() or copy() system calls." << endl; - #endif - #endif - - #ifdef HAVE_RM - system(("rm " + distFile + "tempa").c_str()); - #else - #ifdef HAVE_ERASE - system(("erase " + distFile + "tempa").c_str()); - #else - cout << "Sorry but I can't remove the required files because this operating system doesn't appear to support the rm() or erase() system calls." << endl; - #endif - #endif + appendFiles(distFile+"tempa", distFile); + remove((distFile + "tempa").c_str()); } else{ driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff); - system(("cat " + distFile + "tempb" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempb").c_str()); + appendFiles(distFile+"tempb", distFile); + remove((distFile + "tempb").c_str()); } wait(NULL); } else{ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff); - system(("cat " + distFile + "tempc" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempc").c_str()); + appendFiles(distFile+"tempc", distFile); + remove((distFile + "tempc").c_str()); } wait(NULL); } @@ -139,13 +138,17 @@ int DistanceCommand::execute(){ int pid2 = fork(); if(pid2 > 0){ driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff); - system(("cat " + distFile + "tempa" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempa").c_str()); + //system(("cat " + distFile + "tempa" + " >> " + distFile).c_str()); + appendFiles(distFile+"tempa", distFile); + //system(("rm " + distFile + "tempa").c_str()); + remove((distFile + "tempa").c_str()); } else{ driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff); - system(("cat " + distFile + "tempb" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempb").c_str()); + //system(("cat " + distFile + "tempb" + " >> " + distFile).c_str()); + appendFiles(distFile+"tempb", distFile); + //system(("rm " + distFile + "tempb").c_str()); + remove((distFile + "tempb").c_str()); } wait(NULL); } @@ -153,13 +156,17 @@ int DistanceCommand::execute(){ int pid3 = fork(); if(pid3 > 0){ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff); - system(("cat " + distFile + "tempc" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempc").c_str()); + //system(("cat " + distFile + "tempc" + " >> " + distFile).c_str()); + appendFiles(distFile+"tempc", distFile); + //system(("rm " + distFile + "tempc").c_str()); + remove((distFile + "tempc").c_str()); } else{ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff); - system(("cat " + distFile + "tempd" + " >> " + distFile).c_str()); - system(("rm " + distFile + "tempd").c_str()); + //system(("cat " + distFile + "tempd" + " >> " + distFile).c_str()); + appendFiles(distFile+"tempd", distFile); + //system(("rm " + distFile + "tempd").c_str()); + remove((distFile + "tempd").c_str()); } wait(NULL); } @@ -194,12 +201,13 @@ int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLi for(int i=startLine;icalcDist(align->get(i), align->get(j)); double dist = distCalculator->getDist(); + if(dist <= cutoff){ - distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl; + distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl; +//cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl; } } @@ -224,4 +232,35 @@ int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLi } /**************************************************************************************************/ - +void DistanceCommand::appendFiles(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + //open output file in append mode + openOutputFileAppend(filename, output); + + //open temp file for reading + openInputFile(temp, input); + + string line; + //read input file and write to output file + while(input.eof() != true) { + getline(input, line); //getline removes the newline char + if (line != "") { + output << line << endl; // Appending back newline char + } + } + + input.close(); + output.close(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DistanceCommand class function appendFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} diff --git a/distancecommand.h b/distancecommand.h index a43e395..34dd65e 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -16,6 +16,10 @@ #include "validcalculator.h" #include "dist.h" #include "sequencedb.h" +#include "readfasta.h" +#include "readnexus.h" +#include "readclustal.h" +#include "readseqsphylip.h" using namespace std; @@ -32,12 +36,14 @@ private: ValidCalculators* validCalculator; Dist* distCalculator; SequenceDB* seqDB; + ReadSeqs* readSeqs; ofstream out; string outputFileName; - string ends, distFile; + string ends; int processors; float cutoff; + void appendFiles(string, string); int driver(Dist*, SequenceDB*, int, int, string, float); }; diff --git a/eachgapdist.h b/eachgapdist.h index 767c4bf..f4f00c1 100644 --- a/eachgapdist.h +++ b/eachgapdist.h @@ -22,24 +22,24 @@ public: int start = 0; for(int i=0; i=0;i--){ - if(A.getAligned()[i] == '.' || B.getAligned()[i] == '.' || A.getAligned()[i] == '-' || B.getAligned()[i] == '-'){ + if(A.getUnaligned()[i] == '.' || B.getUnaligned()[i] == '.' || A.getUnaligned()[i] == '-' || B.getUnaligned()[i] == '-'){ } else{ end = i; @@ -42,17 +42,17 @@ public: } for(int i=start;i<=end;i++){ - if(A.getAligned()[i] == '-' && B.getAligned()[i] == '-'){} - else if(A.getAligned()[i] == '.' || B.getAligned()[i] == '.'){ + if(A.getUnaligned()[i] == '-' && B.getUnaligned()[i] == '-'){} + else if(A.getUnaligned()[i] == '.' || B.getUnaligned()[i] == '.'){ break; } - else if(A.getAligned()[i] != '-' && B.getAligned()[i] != '-'){ - if(A.getAligned()[i] != B.getAligned()[i]){ + else if(A.getUnaligned()[i] != '-' && B.getUnaligned()[i] != '-'){ + if(A.getUnaligned()[i] != B.getUnaligned()[i]){ diff++; } length++; } - else if(A.getAligned()[i] != '-' || B.getAligned()[i] != '-'){ + else if(A.getUnaligned()[i] != '-' || B.getUnaligned()[i] != '-'){ diff++; length++; } diff --git a/errorchecking.cpp b/errorchecking.cpp index 803a836..f1279a4 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -188,8 +188,6 @@ bool ErrorCheck::checkInput(string input) { } }else if (commandName == "read.tree") { validateTreeFiles(); //checks the treefile and groupfile parameters - }else if (commandName == "read.seqs") { - if ((fastafile == "") && (nexusfile == "") && (clustalfile == "") && (phylipfile == "")) { cout << "You must enter a fastafile, nexusfile, or clustalfile with the read.seqs() command." << endl; return false; } }else if (commandName == "deconvolute") { if (fastafile == "") { cout << "You must enter a fastafile with the deconvolute() command." << endl; return false; } validateReadFiles(); @@ -248,10 +246,11 @@ bool ErrorCheck::checkInput(string input) { } } - if (commandName == "filter.seqs"){ - if ((globaldata->getFastaFile() == "") && (globaldata->getNexusFile() == "") && (globaldata->getClustalFile() == "") && (globaldata->getPhylipFile() == "")) { + if ((commandName == "filter.seqs") || (commandName == "distance")) { + if ((fastafile == "") && (nexusfile == "") && (clustalfile == "") && (phylipfile == "")) { cout << "You must read either a fasta, nexus, clustal, or phylip file before you can use the filter.seqs command." << endl; return false; } + validateSeqsFiles(); } if ((commandName == "bin.seqs")) { @@ -510,6 +509,71 @@ void ErrorCheck::validateReadPhil() { } /*******************************************************/ +/******************************************************/ +//This function checks to make sure the user entered appropriate +// format parameters on a distfile read +void ErrorCheck::validateSeqsFiles() { + try { + ifstream filehandle; + int ableToOpen; + + //checks to make sure only one file type is given + if (phylipfile != "") { + if ((nexusfile != "") || (fastafile != "") || (clustalfile != "")) { + cout << "You may enter ONLY ONE of the following: phylip, fasta, nexus or clustal." << endl; errorFree = false; } + else { + ableToOpen = openInputFile(phylipfile, filehandle); + filehandle.close(); + if (ableToOpen == 1) { //unable to open + errorFree = false; + } + } + }else if (nexusfile != "") { + if ((phylipfile != "") || (fastafile != "") || (clustalfile != "")) { + cout << "You may enter ONLY ONE of the following: phylip, fasta, nexus or clustal." << endl; errorFree = false; } + else { + ableToOpen = openInputFile(nexusfile, filehandle); + filehandle.close(); + if (ableToOpen == 1) { //unable to open + errorFree = false; + } + } + }else if (fastafile != "") { + if ((phylipfile != "") || (nexusfile != "") || (clustalfile != "")) { + cout << "You may enter ONLY ONE of the following: phylip, fasta, nexus or clustal." << endl; errorFree = false; } + else { + ableToOpen = openInputFile(fastafile, filehandle); + filehandle.close(); + if (ableToOpen == 1) { //unable to open + errorFree = false; + } + } + }else if (clustalfile != "") { + if ((phylipfile != "") || (nexusfile != "") || (fastafile != "")) { + cout << "You may enter ONLY ONE of the following: phylip, fasta, nexus or clustal." << endl; errorFree = false; } + else { + ableToOpen = openInputFile(clustalfile, filehandle); + filehandle.close(); + if (ableToOpen == 1) { //unable to open + errorFree = false; + } + } + + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateSeqsFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ErrorCheck class function validateSeqsFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/*******************************************************/ + /******************************************************/ //This function checks to make sure the user entered appropriate // format parameters on a bin.seq command diff --git a/errorchecking.h b/errorchecking.h index 0f6dbb5..8de9234 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -31,6 +31,7 @@ class ErrorCheck { void validateParseFiles(); void validateTreeFiles(); void validateBinFiles(); + void validateSeqsFiles(); void clear(); void refresh(); string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, cutoff, format; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index dec53a5..6170d00 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -8,15 +8,13 @@ */ #include "filterseqscommand.h" -#include -#include /**************************************************************************************/ void FilterSeqsCommand::doTrump() { trump = globaldata->getTrump(); for(int i = 0; i < db->size(); i++) { Sequence cur = db->get(i); - string curAligned = cur.getAligned(); + string curAligned = cur.getUnaligned(); for(int j = 0; j < curAligned.length(); j++) { string curChar = curAligned.substr(j, 1); if(curChar.compare(trump) == 0) @@ -39,7 +37,7 @@ void FilterSeqsCommand::doSoft() { for(int i = 0; i < db->size(); i++) { Sequence cur = db->get(i); - string curAligned = cur.getAligned(); + string curAligned = cur.getUnaligned(); for(int j = 0; j < curAligned.length(); j++) { string curChar = curAligned.substr(j, 1); @@ -99,8 +97,24 @@ void FilterSeqsCommand::doFilter() { int FilterSeqsCommand::execute() { try { globaldata = GlobalData::getInstance(); - db = globaldata->gSequenceDB; + filename = globaldata->inputFileName; + if(globaldata->getFastaFile() != "") { + readSeqs = new ReadFasta(filename); } + else if(globaldata->getNexusFile() != "") { + readSeqs = new ReadNexus(filename); } + else if(globaldata->getClustalFile() != "") { + readSeqs = new ReadClustal(filename); } + else if(globaldata->getPhylipFile() != "") { + readSeqs = new ReadPhylip(filename); } + + readSeqs->read(); + db = readSeqs->getDB(); + + //for(int i = 0; i < db->size(); i++) { +// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n"; +// } + for(int i = 0; i < db->get(0).getLength(); i++) columnsToRemove.push_back(false); @@ -109,7 +123,6 @@ int FilterSeqsCommand::execute() { doTrump(); else if(globaldata->getSoft().compare("") != 0) doSoft(); - else if(globaldata->getFilter().compare("") != 0) doFilter(); @@ -121,11 +134,13 @@ int FilterSeqsCommand::execute() { // else // cout << "false\n"; // } + + //Creating the new SequenceDB SequenceDB newDB; for(int i = 0; i < db->size(); i++) { Sequence curSeq = db->get(i); - string curAligned = curSeq.getAligned(); + string curAligned = curSeq.getUnaligned(); string curName = curSeq.getName(); string newAligned = ""; for(int j = 0; j < curAligned.length(); j++) @@ -136,11 +151,16 @@ int FilterSeqsCommand::execute() { newDB.add(newSeq); } + string newFileName = getRootName(filename) + "filter.fa"; ofstream outfile; - outfile.open("filtertest.txt"); + outfile.open(newFileName.c_str()); newDB.print(outfile); outfile.close(); + globaldata->clear(); + //delete db; + //delete newDB; + return 0; } catch(exception& e) { @@ -151,5 +171,4 @@ int FilterSeqsCommand::execute() { cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } -} -/**************************************************************************************/ +} \ No newline at end of file diff --git a/filterseqscommand.h b/filterseqscommand.h index 9e703ce..06eaa6d 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -35,10 +35,7 @@ private: GlobalData* globaldata; string filename, trump, filter; - ReadFasta* readFasta; - ReadNexus* readNexus; - ReadClustal* readClustal; - ReadPhylip* readPhylip; + ReadSeqs* readSeqs; vector columnsToRemove; SequenceDB* db; diff --git a/ignoregaps.h b/ignoregaps.h index 8127b62..aa106c9 100644 --- a/ignoregaps.h +++ b/ignoregaps.h @@ -27,7 +27,7 @@ public: int end = 0; for(int i=0;i=0;i--){ - if(A.getAligned()[i] == '.' || B.getAligned()[i] == '.' || A.getAligned()[i] == '-' || B.getAligned()[i] == '-'){ + if(A.getUnaligned()[i] == '.' || B.getUnaligned()[i] == '.' || A.getUnaligned()[i] == '-' || B.getUnaligned()[i] == '-'){ } else{ end = i; @@ -44,11 +44,11 @@ public: } for(int i=start; i<=end; i++){ - if(A.getAligned()[i] == '.' || B.getAligned()[i] == '.'){ + if(A.getUnaligned()[i] == '.' || B.getUnaligned()[i] == '.'){ break; } - else if((A.getAligned()[i] != '-' && B.getAligned()[i] != '-')){ - if(A.getAligned()[i] != B.getAligned()[i]){ + else if((A.getUnaligned()[i] != '-' && B.getUnaligned()[i] != '-')){ + if(A.getUnaligned()[i] != B.getUnaligned()[i]){ diff++; } length++; diff --git a/mothur.h b/mothur.h index e4a9bfc..2c9595e 100644 --- a/mothur.h +++ b/mothur.h @@ -273,6 +273,21 @@ inline int openOutputFile(string fileName, ofstream& fileHandle){ } } +/***********************************************************************/ + +inline int openOutputFileAppend(string fileName, ofstream& fileHandle){ + + fileHandle.open(fileName.c_str(), ios::app); + if(!fileHandle) { + cerr << "Error: Could not open " << fileName << endl; + return 1; + } + else { + return 0; + } + +} + /***********************************************************************/ diff --git a/onegapdist.h b/onegapdist.h index b6b257c..ef290fe 100644 --- a/onegapdist.h +++ b/onegapdist.h @@ -26,7 +26,7 @@ public: int start = 0; for(int i=0;i=0;i--){ - if(A.getAligned()[i] == '.' || B.getAligned()[i] == '.' || A.getAligned()[i] == '-' || B.getAligned()[i] == '-'){ + if(A.getUnaligned()[i] == '.' || B.getUnaligned()[i] == '.' || A.getUnaligned()[i] == '-' || B.getUnaligned()[i] == '-'){ } else{ end = i; @@ -44,8 +44,8 @@ public: for(int i=start;i<=end;i++){ - if(A.getAligned()[i] == '-' && B.getAligned()[i] == '-'){} - else if(A.getAligned()[i] == '-' && B.getAligned()[i] != '-'){ + if(A.getUnaligned()[i] == '-' && B.getUnaligned()[i] == '-'){} + else if(A.getUnaligned()[i] == '-' && B.getUnaligned()[i] != '-'){ if(openGapA == 0){ difference++; minLength++; @@ -53,7 +53,7 @@ public: openGapB = 0; } } - else if(A.getAligned()[i] != '-' && B.getAligned()[i] == '-'){ + else if(A.getUnaligned()[i] != '-' && B.getUnaligned()[i] == '-'){ if(openGapB == 0){ difference++; minLength++; @@ -61,8 +61,8 @@ public: openGapB = 1; } } - else if(A.getAligned()[i] != '-' && B.getAligned()[i] != '-'){ - if(A.getAligned()[i] != B.getAligned()[i]){ + else if(A.getUnaligned()[i] != '-' && B.getUnaligned()[i] != '-'){ + if(A.getUnaligned()[i] != B.getUnaligned()[i]){ difference++; minLength++; openGapA = 0; diff --git a/readclustal.cpp b/readclustal.cpp index 2f31539..f42b359 100644 --- a/readclustal.cpp +++ b/readclustal.cpp @@ -12,11 +12,8 @@ #include /*******************************************************************************/ -ReadClustal::ReadClustal(string file) { +ReadClustal::ReadClustal(string file) : ReadSeqs(file){ try { - openInputFile(file, filehandle); - clustalFile = file; - globaldata = GlobalData::getInstance(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -43,6 +40,7 @@ void ReadClustal::read() { int count = 0; int numSeqs = 0; + int lastSeqLength = 0; bool firstDone = false; while(!filehandle.eof()) { @@ -61,16 +59,20 @@ void ReadClustal::read() { if(name.find_first_of("*") == -1) { filehandle >> sequence; + lastSeqLength = sequence.length(); if(!firstDone) { Sequence newSeq(name, sequence); sequencedb.add(newSeq); } else - sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + sequencedb.set(count, sequencedb.get(count).getUnaligned() + sequence); count++; } } + if(count == 1) + sequencedb.set(0, sequencedb.get(0).getUnaligned().substr(0, sequencedb.get(0).getUnaligned().length() - lastSeqLength)); + filehandle.close(); } diff --git a/readclustal.h b/readclustal.h index e09e900..547112c 100644 --- a/readclustal.h +++ b/readclustal.h @@ -11,28 +11,20 @@ */ using namespace std; +#include "readseqs.h" #include "globaldata.hpp" #include "sequencedb.h" #include "mothur.h" /**********************************************************************************/ -class ReadClustal { +class ReadClustal : public ReadSeqs { public: ReadClustal(string); ~ReadClustal(); void read(); - SequenceDB* getDB(); - - private: - GlobalData* globaldata; - string clustalFile; - ifstream filehandle; - SequenceDB sequencedb; - int readOk; // readOk = 0 means success, readOk = 1 means error(s). - - + SequenceDB* getDB(); }; #endif \ No newline at end of file diff --git a/readfasta.cpp b/readfasta.cpp index 347432f..e1f22ce 100644 --- a/readfasta.cpp +++ b/readfasta.cpp @@ -12,11 +12,8 @@ #include /*******************************************************************************/ -ReadFasta::ReadFasta(string file) { +ReadFasta::ReadFasta(string file) : ReadSeqs(file) { try { - openInputFile(file, filehandle); - fastaFile = file; - globaldata = GlobalData::getInstance(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -38,6 +35,7 @@ void ReadFasta::read() { string sequence = ""; string temp; int count = 0; + while(!filehandle.eof()){ if(count == 0) filehandle >> temp; @@ -51,10 +49,16 @@ void ReadFasta::read() { count++; name = temp.substr(1,temp.length()-1); } - else + else { sequence += temp; + } filehandle >> temp; + gobble(filehandle); + + if(filehandle.eof()) + sequence += temp; + } Sequence newSequence(name, sequence); sequencedb.add(newSequence); diff --git a/readfasta.h b/readfasta.h index 8f5d70d..11cdab0 100644 --- a/readfasta.h +++ b/readfasta.h @@ -12,28 +12,20 @@ using namespace std; +#include "readseqs.h" #include "globaldata.hpp" #include "sequencedb.h" #include "mothur.h" /**********************************************************************************/ -class ReadFasta { +class ReadFasta : public ReadSeqs { public: ReadFasta(string); ~ReadFasta(); void read(); SequenceDB* getDB(); - - private: - GlobalData* globaldata; - string fastaFile; - ifstream filehandle; - SequenceDB sequencedb; - int readOk; // readOk = 0 means success, readOk = 1 means error(s). - - }; #endif \ No newline at end of file diff --git a/readnexus.cpp b/readnexus.cpp index 25444d5..5dffa42 100644 --- a/readnexus.cpp +++ b/readnexus.cpp @@ -12,11 +12,8 @@ #include /*******************************************************************************/ -ReadNexus::ReadNexus(string file) { +ReadNexus::ReadNexus(string file) : ReadSeqs(file) { try { - openInputFile(file, filehandle); - nexusFile = file; - globaldata = GlobalData::getInstance(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -56,7 +53,7 @@ void ReadNexus::read() { sequencedb.add(newSeq); } else - sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + sequencedb.set(count, sequencedb.get(count).getUnaligned() + sequence); count++; if(count == numSeqs) { diff --git a/readnexus.h b/readnexus.h index f6bfcbb..374b820 100644 --- a/readnexus.h +++ b/readnexus.h @@ -11,28 +11,20 @@ */ using namespace std; +#include "readseqs.h" #include "globaldata.hpp" #include "sequencedb.h" #include "mothur.h" /**********************************************************************************/ -class ReadNexus { +class ReadNexus : public ReadSeqs { public: ReadNexus(string); ~ReadNexus(); void read(); - SequenceDB* getDB(); - - private: - GlobalData* globaldata; - string nexusFile; - ifstream filehandle; - SequenceDB sequencedb; - int readOk; // readOk = 0 means success, readOk = 1 means error(s). - - + SequenceDB* getDB(); }; #endif \ No newline at end of file diff --git a/readseqs.cpp b/readseqs.cpp new file mode 100644 index 0000000..00a301d --- /dev/null +++ b/readseqs.cpp @@ -0,0 +1,43 @@ +/* + * readseqs.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/11/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readseqs.h" +#include +#include + + +/*******************************************************************************/ +ReadSeqs::ReadSeqs(string file) { + try { + openInputFile(file, filehandle); + seqFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadSeqs::~ReadSeqs(){ + //for(int i = 0; i < sequencedb.getNumSeqs(); i++) + //delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadSeqs::read() { +} + +/*********************************************************************************/ +SequenceDB* ReadSeqs::getDB() { + return &sequencedb; +} diff --git a/readseqs.h b/readseqs.h new file mode 100644 index 0000000..0bbc297 --- /dev/null +++ b/readseqs.h @@ -0,0 +1,37 @@ +#ifndef READSEQS_H +#define READSEQS_H + +/* + * readseqs.h + * Mothur + * + * Created by Thomas Ryabin on 5/11/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadSeqs { + + public: + ReadSeqs(string); + ~ReadSeqs(); + virtual void read(); + virtual SequenceDB* getDB(); + + protected: + GlobalData* globaldata; + string seqFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). +}; + +#endif diff --git a/readseqscommand.cpp b/readseqscommand.cpp deleted file mode 100644 index dee09f5..0000000 --- a/readseqscommand.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * readseqscommand.cpp - * Mothur - * - * Created by Thomas Ryabin on 4/13/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "readseqscommand.h" - -//********************************************************************************************************************** -ReadSeqsCommand::ReadSeqsCommand(){ - try { - globaldata = GlobalData::getInstance(); - filename = globaldata->inputFileName; - if(globaldata->getFastaFile().compare("") != 0) { - readFasta = new ReadFasta(filename); - readFasta->read(); - globaldata->gSequenceDB = readFasta->getDB(); - } - else if(globaldata->getNexusFile().compare("") != 0) { - readNexus = new ReadNexus(filename); - readNexus->read(); - globaldata->gSequenceDB = readNexus->getDB(); - } - else if(globaldata->getClustalFile().compare("") != 0) { - readClustal = new ReadClustal(filename); - readClustal->read(); - globaldata->gSequenceDB = readClustal->getDB(); - } - else if(globaldata->getPhylipFile().compare("") != 0) { - readPhylip = new ReadPhylip(filename); - readPhylip->read(); - globaldata->gSequenceDB = readPhylip->getDB(); - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadSeqsCommand class Function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ReadSeqsCommand class function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -//********************************************************************************************************************** - -ReadSeqsCommand::~ReadSeqsCommand(){ - //delete readFasta->getDB(); -// delete readNexus->getDB(); -// delete readClustal->getDB(); -// delete readPhylip->getDB(); -} - -//********************************************************************************************************************** - -int ReadSeqsCommand::execute(){ - try { - filebuf fb; - - //fb.open ("fasta.txt",ios::out); - //readFasta->read(); -// SequenceDB* db = readFasta->getDB(); - - //fb.open("nexus.txt",ios::out); -// readNexus->read(); -// SequenceDB* db = readNexus->getDB(); - - //fb.open("clustal.txt",ios::out); -// readClustal->read(); -// SequenceDB* db = readClustal->getDB(); - - //fb.open("phylip.txt",ios::out); -// readPhylip->read(); -// SequenceDB* db = readPhylip->getDB(); - - - - //for(int i = 0; i < db->size(); i++) { -// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getAligned() << "\n\n"; -// } - - //ostream os(&fb); -// db->print(os); -// fb.close(); - - return 0; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ReadOtuCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -//********************************************************************************************************************** diff --git a/readseqscommand.h b/readseqscommand.h deleted file mode 100644 index 52c8a7c..0000000 --- a/readseqscommand.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef READSEQSCOMMAND_H -#define READSEQSCOMMAND_H -/* - * readseqscommand.h - * Mothur - * - * Created by Thomas Ryabin on 4/13/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "command.hpp" -#include "readfasta.h" -#include "readnexus.h" -#include "readclustal.h" -#include "readseqsphylip.h" - -class GlobalData; - -class ReadSeqsCommand : public Command { -public: - ReadSeqsCommand(); - ~ReadSeqsCommand(); - int execute(); - -private: - GlobalData* globaldata; - ReadFasta* readFasta; - ReadNexus* readNexus; - ReadClustal* readClustal; - ReadPhylip* readPhylip; - string filename; -}; - -#endif diff --git a/readseqsphylip.cpp b/readseqsphylip.cpp index 40f56a0..fbb99c9 100644 --- a/readseqsphylip.cpp +++ b/readseqsphylip.cpp @@ -31,11 +31,8 @@ bool ReadPhylip::isSeq(string seq) { } /*******************************************************************************/ -ReadPhylip::ReadPhylip(string file) { +ReadPhylip::ReadPhylip(string file) : ReadSeqs(file) { try { - openInputFile(file, filehandle); - phylipFile = file; - globaldata = GlobalData::getInstance(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -107,7 +104,7 @@ void ReadPhylip::read() { } } if(!(last && count == 0)) - sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + sequencedb.set(count, sequencedb.get(count).getUnaligned() + sequence); if(letterCount == numLetters && count == 0) last = true; } diff --git a/readseqsphylip.h b/readseqsphylip.h index 13c0202..be4168d 100644 --- a/readseqsphylip.h +++ b/readseqsphylip.h @@ -11,13 +11,14 @@ */ using namespace std; +#include "readseqs.h" #include "globaldata.hpp" #include "sequencedb.h" #include "mothur.h" /**********************************************************************************/ -class ReadPhylip { +class ReadPhylip : public ReadSeqs { public: ReadPhylip(string); @@ -26,12 +27,6 @@ class ReadPhylip { SequenceDB* getDB(); private: - GlobalData* globaldata; - string phylipFile; - ifstream filehandle; - SequenceDB sequencedb; - int readOk; // readOk = 0 means success, readOk = 1 means error(s). - bool isSeq(string); }; diff --git a/sequence.cpp b/sequence.cpp index 5b3b01d..b59363e 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -114,7 +114,7 @@ int Sequence::getLength(){ //******************************************************************************************************************** -void Sequence::printSequence(ofstream& out){ +void Sequence::printSequence(ostream& out){ string toPrint = unaligned; if(aligned.length() > unaligned.length()) toPrint = aligned; diff --git a/sequence.hpp b/sequence.hpp index dea06bd..03cbab7 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -33,7 +33,7 @@ public: string getPairwise(); string getUnaligned(); int getLength(); - void printSequence(ofstream&); + void printSequence(ostream&); private: string name; diff --git a/sequencedb.cpp b/sequencedb.cpp index 657573e..f948bf3 100644 --- a/sequencedb.cpp +++ b/sequencedb.cpp @@ -72,7 +72,7 @@ int SequenceDB::size() { /***********************************************************************/ -void SequenceDB::print(ofstream& out) { +void SequenceDB::print(ostream& out) { for(int i = 0; i < data.size(); i++) data[i].printSequence(out); } diff --git a/sequencedb.h b/sequencedb.h index 5dcf9bd..eb89454 100644 --- a/sequencedb.h +++ b/sequencedb.h @@ -36,7 +36,7 @@ public: void changeSize(int); //resizes data void clear(); //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak int size(); //returns datas size - void print(ofstream&); //loops through data using sequence class print + void print(ostream&); //loops through data using sequence class print private: vector data; diff --git a/validparameter.cpp b/validparameter.cpp index 3e6a979..e97df8b 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -220,9 +220,6 @@ void ValidParameters::initCommandParameters() { string readtreeArray[] = {"tree","group"}; commandParameters["read.tree"] = addParameters(readtreeArray, sizeof(readtreeArray)/sizeof(string)); - string readseqsArray[] = {"fasta","phylip","clustal","nexus","line"}; - commandParameters["read.seqs"] = addParameters(readseqsArray, sizeof(readseqsArray)/sizeof(string)); - string clusterArray[] = {"cutoff","precision","method"}; commandParameters["cluster"] = addParameters(clusterArray, sizeof(clusterArray)/sizeof(string)); @@ -271,7 +268,7 @@ void ValidParameters::initCommandParameters() { string heatmapArray[] = {"groups","line","label","sorted","scale"}; commandParameters["heatmap"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string)); - string filterseqsArray[] = {"trump", "soft", "filter"}; + string filterseqsArray[] = {"fasta","phylip","clustal","nexus", "trump", "soft", "filter"}; commandParameters["filter.seqs"] = addParameters(filterseqsArray, sizeof(filterseqsArray)/sizeof(string)); string vennArray[] = {"groups","line","label","calc"}; @@ -292,7 +289,7 @@ void ValidParameters::initCommandParameters() { string concensusArray[] = {}; commandParameters["concensus"] = addParameters(concensusArray, sizeof(concensusArray)/sizeof(string)); - string distanceArray[] = {"calc", "ends", "cutoff", "processors"}; + string distanceArray[] = {"fasta","phylip","clustal","nexus", "calc", "ends", "cutoff", "processors"}; commandParameters["distance"] = addParameters(distanceArray, sizeof(distanceArray)/sizeof(string)); string quitArray[] = {};