7E412F490F8D21B600381DD0 /* slibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E412F480F8D21B600381DD0 /* slibshuff.cpp */; };
7E412FEA0F8D3E2C00381DD0 /* libshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */; };
7E4130F80F8E58FA00381DD0 /* dlibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E4130F60F8E58FA00381DD0 /* dlibshuff.cpp */; };
+ 7E9214820FD2081200699BF3 /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E9214810FD2081200699BF3 /* seqsummarycommand.cpp */; };
7EC3D4550FA0FFF900338DA5 /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7EC3D4500FA0FFF900338DA5 /* coverage.cpp */; };
7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7EC3D4530FA0FFF900338DA5 /* whittaker.cpp */; };
8DD76F6A0486A84900D96B5E /* Mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* Mothur.1 */; };
7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = libshuff.cpp; sourceTree = SOURCE_ROOT; };
7E4130F60F8E58FA00381DD0 /* dlibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dlibshuff.cpp; sourceTree = SOURCE_ROOT; };
7E4130F70F8E58FA00381DD0 /* dlibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dlibshuff.h; sourceTree = SOURCE_ROOT; };
+ 7E9214800FD2081200699BF3 /* seqsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqsummarycommand.h; sourceTree = "<group>"; };
+ 7E9214810FD2081200699BF3 /* seqsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqsummarycommand.cpp; sourceTree = "<group>"; };
7EC3D4500FA0FFF900338DA5 /* coverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = coverage.cpp; sourceTree = SOURCE_ROOT; };
7EC3D4510FA0FFF900338DA5 /* coverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = coverage.h; sourceTree = SOURCE_ROOT; };
7EC3D4520FA0FFF900338DA5 /* sharedanderberg.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedanderberg.h; sourceTree = SOURCE_ROOT; };
37D928A90F2133E5001D4494 /* commands */ = {
isa = PBXGroup;
children = (
+ 7E9214800FD2081200699BF3 /* seqsummarycommand.h */,
+ 7E9214810FD2081200699BF3 /* seqsummarycommand.cpp */,
37D927CD0F21331F001D4494 /* command.hpp */,
378DC5CD0FBDE1C8003B8607 /* aligncommand.h */,
378DC5CE0FBDE1C8003B8607 /* aligncommand.cpp */,
373C699C0FC1E63600137ACD /* solow.cpp in Sources */,
EB72FE260FC1F5CA0051AC11 /* shen.cpp in Sources */,
21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */,
+ 7E9214820FD2081200699BF3 /* seqsummarycommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "distancecommand.h"
#include "aligncommand.h"
#include "matrixoutputcommand.h"
-
+#include "seqsummarycommand.h"
/***********************************************************/
else if(commandName == "concensus") { command = new ConcensusCommand(); }
else if(commandName == "dist.seqs") { command = new DistanceCommand(); }
else if(commandName == "align.seqs") { command = new AlignCommand(); }
+ else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(); }
else { command = new NoCommand(); }
return command;
string seqName, sequence;
for(int i=0;i<numSeqs;i++){
- templateSequences[i] = new Sequence(); // We're storing the aligned template sequences as a vector of
+// templateSequences[i] = new Sequence(); // We're storing the aligned template sequences as a vector of
// pointers to Sequence objects
fastaFile >> seqName;
- templateSequences[i]->setName(seqName);
+// templateSequences[i]->setName(seqName);
char letter;
string aligned;
aligned += letter;
}
}
- templateSequences[i]->setAligned(aligned); // Obviously, we need the fully aligned template sequence
- templateSequences[i]->setUnaligned(aligned);// We will also need the unaligned sequence for pairwise alignments
+ templateSequences[i] = new Sequence(seqName, aligned);
+// templateSequences[i]->setAligned(aligned); // Obviously, we need the fully aligned template sequence
+// templateSequences[i]->setUnaligned(aligned);// We will also need the unaligned sequence for pairwise alignments
fastaFile.putback(letter); // and database searches
}
}else if (globaldata->Estimators[i] == "eachgap") {
distCalculator = new eachGapDist();
}else if (globaldata->Estimators[i] == "onegap") {
- distCalculator = new oneGapDist(); }
+ distCalculator = new oneGapDist(); }
}
}
}else {
}
}
}
-
+
//reset calc for next command
globaldata->setCalc("");
}
string filename = globaldata->inputFileName;
if(globaldata->getFastaFile() != "") {
- readSeqs = new ReadFasta(filename); }
+ readSeqs = new ReadFasta(filename); }
else if(globaldata->getNexusFile() != "") {
- readSeqs = new ReadNexus(filename); }
+ readSeqs = new ReadNexus(filename); }
else if(globaldata->getClustalFile() != "") {
- readSeqs = new ReadClustal(filename); }
+ readSeqs = new ReadClustal(filename); }
else if(globaldata->getPhylipFile() != "") {
- readSeqs = new ReadPhylip(filename); }
-
+ readSeqs = new ReadPhylip(filename); }
+
readSeqs->read();
seqDB = readSeqs->getDB();
-
+
int numSeqs = seqDB->getNumSeqs();
cutoff += 0.005;
-
+
string distFile = getRootName(globaldata->getFastaFile()) + "dist";
remove(distFile.c_str());
//# if defined (_WIN32)
- //figure out how to implement the fork and wait commands in windows
+ //figure out how to implement the fork and wait commands in windows
// driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
//# endif
- # if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- }
- else if(processors == 2){
-
- int pid = fork();
- if(pid > 0){
- driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);
- appendFiles((distFile+"tempa"), distFile);
+#if defined (__APPLE__) || (__MACH__)
+ if(processors == 1){
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
+ }
+ else if(processors == 2){
+
+ int pid = fork();
+ if(pid > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ remove((distFile + "tempa").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ remove((distFile + "tempb").c_str());
+ }
+ wait(NULL);
+
+ }
+ else if(processors == 3){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
+ appendFiles(distFile+"tempa", distFile);
remove((distFile + "tempa").c_str());
}
else{
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);
- appendFiles((distFile+"tempb"), distFile);
- remove((distFile + "tempb").c_str());
+ driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ remove((distFile + "tempb").c_str());
}
wait(NULL);
-
}
- else if(processors == 3){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
+ else{
+ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ remove((distFile + "tempc").c_str());
+ }
+ wait(NULL);
+ }
+ else if(processors == 4){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
+ appendFiles(distFile+"tempa", distFile);
+ remove((distFile + "tempa").c_str());
}
else{
- driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
+ driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ remove((distFile + "tempb").c_str());
}
wait(NULL);
}
- else if(processors == 4){
- int pid1 = fork();
- if(pid1 > 0){
- int pid2 = fork();
- if(pid2 > 0){
- driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
- appendFiles(distFile+"tempa", distFile);
- remove((distFile + "tempa").c_str());
- }
- else{
- driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
- appendFiles(distFile+"tempb", distFile);
- remove((distFile + "tempb").c_str());
- }
- wait(NULL);
+ else{
+ int pid3 = fork();
+ if(pid3 > 0){
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ remove((distFile + "tempc").c_str());
}
else{
- int pid3 = fork();
- if(pid3 > 0){
- driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
- appendFiles(distFile+"tempc", distFile);
- remove((distFile + "tempc").c_str());
- }
- else{
- driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
- appendFiles(distFile+"tempd", distFile);
- remove((distFile + "tempd").c_str());
- }
- wait(NULL);
+ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
+ appendFiles(distFile+"tempd", distFile);
+ remove((distFile + "tempd").c_str());
}
wait(NULL);
}
wait(NULL);
- # else
- driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
- # endif
-
+ }
+ wait(NULL);
+#elif (linux) || (__linux)
+ if(processors == 1){
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
+ }
+ else if(processors == 2){
+
+ int pid = fork();
+ if(pid > 0){
+ driver(distCalculator, seqDB, 0, (numSeqs/sqrt(2)), distFile + "tempa", cutoff);
+ appendFiles((distFile+"tempa"), distFile);
+ remove((distFile + "tempa").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), numSeqs, distFile + "tempb", cutoff);
+ appendFiles((distFile+"tempb"), distFile);
+ remove((distFile + "tempb").c_str());
+ }
+ wait();
+
+ }
+ else if(processors == 3){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, sqrt(3) * numSeqs / 3, distFile + "tempa", cutoff);
+ appendFiles(distFile+"tempa", distFile);
+ remove((distFile + "tempa").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, sqrt(3) * numSeqs / 3, sqrt(6) * numSeqs / 3, distFile + "tempb", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ remove((distFile + "tempb").c_str());
+ }
+ wait();
+ }
+ else{
+ driver(distCalculator, seqDB, sqrt(6) * numSeqs / 3, numSeqs, distFile + "tempc", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ remove((distFile + "tempc").c_str());
+ }
+ wait();
+ }
+ else if(processors == 4){
+ int pid1 = fork();
+ if(pid1 > 0){
+ int pid2 = fork();
+ if(pid2 > 0){
+ driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
+ appendFiles(distFile+"tempa", distFile);
+ remove((distFile + "tempa").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
+ appendFiles(distFile+"tempb", distFile);
+ remove((distFile + "tempb").c_str());
+ }
+ wait();
+ }
+ else{
+ int pid3 = fork();
+ if(pid3 > 0){
+ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
+ appendFiles(distFile+"tempc", distFile);
+ remove((distFile + "tempc").c_str());
+ }
+ else{
+ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
+ appendFiles(distFile+"tempd", distFile);
+ remove((distFile + "tempd").c_str());
+ }
+ wait();
+ }
+ wait();
+ }
+ wait();
+
+#else
+ driver(distCalculator, seqDB, 0, numSeqs, distFile, cutoff);
+#endif
+
delete distCalculator;
-
+
return 0;
-
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLine, int endLine, string dFileName, float cutoff){
try {
int startTime = time(NULL);
-
+
ofstream distFile(dFileName.c_str(), ios::trunc);
distFile.setf(ios::fixed, ios::showpoint);
distFile << setprecision(4);
-
- for(int i=startLine;i<endLine;i++){
+ for(int i=startLine;i<endLine;i++){
+
for(int j=0;j<i;j++){
distCalculator->calcDist(align->get(i), align->get(j));
double dist = distCalculator->getDist();
-
+
if(dist <= cutoff){
distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
}
-
+
}
if(i % 100 == 0){
cout << i << '\t' << time(NULL) - startTime << endl;
}
-
+
}
cout << endLine-1 << '\t' << time(NULL) - startTime << endl;
-
+
return 1;
}
catch(exception& e) {
cout << "An unknown error has occurred in the DistanceCommand class function driver. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
-
+
}
/**************************************************************************************************/
output << line << endl; // Appending back newline char
}
}
-
+
input.close();
output.close();
}
bool errorFree;
ErrorCheck* errorCheckor = new ErrorCheck();
- cout << "mothur v1.2.0" << endl;
- cout << "Last updated: 4/14/2009" << endl << endl;
+ cout << "mothur v.1.3.0" << endl;
+ cout << "Last updated: 4/25/2009" << endl << endl;
cout << "by" << endl;
cout << "Patrick D. Schloss" << endl << endl;
cout << "Department of Microbiology" << endl;
name = line.substr(1, line.length()); //rips off '>'
//read through file
- while (in.eof() != true) {
+ while (!in.eof()) {
in >> line;
if (line != "") {
if (isalnum(line.at(0))) { //if it's a sequence line
sequence = "";
}
}
+ gobble(in);
}
- //store last sequence and name info.
- seqmap[name] = sequence;
- it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
- data[sequence].groupnumber++;
- }
}
catch(exception& e) {
db = readSeqs->getDB();
numSeqs = db->size();
- alignmentLength = db->get(0).getLength();
+ alignmentLength = db->get(0).getAlignLength();
filter = string(alignmentLength, '1');
}
};
-#endif
\ No newline at end of file
+#endif
void NastReport::setCandidate(Sequence* candSeq){
queryName = candSeq->getName();
- queryLength = candSeq->getUnalignLength();
+ queryLength = candSeq->getNumBases();
}
/******************************************************************************************************************/
void NastReport::setTemplate(Sequence* tempSeq){
templateName = tempSeq->getName();
- templateLength = tempSeq->getUnalignLength();
+ templateLength = tempSeq->getNumBases();
}
/******************************************************************************************************************/
--- /dev/null
+/*
+ * seqcoordcommand.cpp
+ * Mothur
+ *
+ * Created by Pat Schloss on 5/30/09.
+ * Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "seqsummarycommand.h"
+
+//***************************************************************************************************************
+
+SeqSummaryCommand::SeqSummaryCommand(){
+ try {
+ globaldata = GlobalData::getInstance();
+
+ if(globaldata->getFastaFile() != "") { readSeqs = new ReadFasta(globaldata->inputFileName); }
+ else if(globaldata->getNexusFile() != "") { readSeqs = new ReadNexus(globaldata->inputFileName); }
+ else if(globaldata->getClustalFile() != "") { readSeqs = new ReadClustal(globaldata->inputFileName); }
+ else if(globaldata->getPhylipFile() != "") { readSeqs = new ReadPhylip(globaldata->inputFileName); }
+
+ readSeqs->read();
+ db = readSeqs->getDB();
+ numSeqs = db->size();
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+
+SeqSummaryCommand::~SeqSummaryCommand(){
+}
+
+//***************************************************************************************************************
+
+int SeqSummaryCommand::execute(){
+ try{
+
+ ofstream outfile;
+ string summaryFile = getRootName(globaldata->inputFileName) + "summary";
+ openOutputFile(summaryFile, outfile);
+
+ vector<int> startPosition(numSeqs, 0);
+ vector<int> endPosition(numSeqs, 0);
+ vector<int> seqLength(numSeqs, 0);
+ vector<int> ambigBases(numSeqs, 0);
+ vector<int> longHomoPolymer(numSeqs, 0);
+
+ if(db->get(0).getIsAligned() == 1){
+ outfile << "seqname\tstart\tend\tlength\tambiguities\tlonghomopolymer" << endl;
+ for(int i = 0; i < numSeqs; i++) {
+ Sequence current = db->get(i);
+ startPosition[i] = current.getStartPos();
+ endPosition[i] = current.getEndPos();
+ seqLength[i] = current.getNumBases();
+ ambigBases[i] = current.getAmbigBases();
+ longHomoPolymer[i] = current.getLongHomoPolymer();
+ outfile << current.getName() << '\t' << startPosition[i] << '\t' << endPosition[i] << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
+ }
+ }
+ else{
+ outfile << "seqname\tlength\tambiguities\tlonghomopolymer" << endl;
+ for(int i=0;i<numSeqs;i++){
+ Sequence current = db->get(i);
+ seqLength[i] = current.getNumBases();
+ ambigBases[i] = current.getAmbigBases();
+ longHomoPolymer[i] = current.getLongHomoPolymer();
+ outfile << current.getName() << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
+ }
+ }
+
+ sort(seqLength.begin(), seqLength.end());
+ sort(ambigBases.begin(), ambigBases.end());
+ sort(longHomoPolymer.begin(), longHomoPolymer.end());
+
+ int median = int(numSeqs * 0.500);
+ int lowestPtile = int(numSeqs * 0.025);
+ int lowPtile = int(numSeqs * 0.250);
+ int highPtile = int(numSeqs * 0.750);
+ int highestPtile = int(numSeqs * 0.975);
+ int max = numSeqs - 1;
+
+ cout << endl;
+ if(db->get(0).getIsAligned() == 1){
+ sort(startPosition.begin(), startPosition.end());
+ sort(endPosition.begin(), endPosition.end());
+
+ cout << "\t\tStart\tEnd\tLength\tN's\tPolymer" << endl;
+ cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
+ cout << "2.5%-tile:\t" << startPosition[lowestPtile] << '\t' << endPosition[lowestPtile] << '\t' << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
+ cout << "25%-tile:\t" << startPosition[lowPtile] << '\t' << endPosition[lowPtile] << '\t' << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
+ cout << "Median: \t" << startPosition[median] << '\t' << endPosition[median] << '\t' << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
+ cout << "75%-tile:\t" << startPosition[highPtile] << '\t' << endPosition[highPtile] << '\t' << seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
+ cout << "97.5%-tile:\t" << startPosition[highestPtile] << '\t' << endPosition[highestPtile] << '\t' << seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
+ cout << "Maximum:\t" << startPosition[max] << '\t' << endPosition[max] << '\t' << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
+ }
+ else{
+ cout << "\t\tLength\tN's\tPolymer" << endl;
+ cout << "Minimum:\t" << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
+ cout << "2.5%-tile:\t" << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
+ cout << "25%-tile:\t" << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
+ cout << "Median: \t" << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
+ cout << "75%-tile:\t"<< seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
+ cout << "97.5%-tile:\t"<< seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
+ cout << "Maximum:\t" << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
+ }
+ cout << "# of Seqs:\t" << numSeqs << endl;
+
+ return 0;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+//***************************************************************************************************************
+
+
--- /dev/null
+#ifndef SEQSUMMARYCOMMAND_H
+#define SEQSUMMARYCOMMAND_H
+
+/*
+ * seqcoordcommand.h
+ * Mothur
+ *
+ * Created by Pat Schloss on 5/30/09.
+ * Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "globaldata.hpp"
+#include "readfasta.h"
+#include "readnexus.h"
+#include "readclustal.h"
+#include "readseqsphylip.h"
+
+using namespace std;
+
+class SeqSummaryCommand : public Command {
+public:
+ SeqSummaryCommand();
+ ~SeqSummaryCommand();
+ int execute();
+
+private:
+ int numSeqs;
+ GlobalData* globaldata;
+ ReadSeqs* readSeqs;
+ SequenceDB* db;
+};
+
+#endif
/***********************************************************************/
-Sequence::Sequence() {}
+Sequence::Sequence(){
+ initialize();
+}
/***********************************************************************/
Sequence::Sequence(string newName, string sequence) {
+
+ initialize();
name = newName;
if(sequence.find_first_of('-') != string::npos) {
setAligned(sequence);
+ isAligned = 1;
}
setUnaligned(sequence);
+
}
//********************************************************************************************************************
//********************************************************************************************************************
-string Sequence::convert2ints() {
+void Sequence::initialize(){
- if(unaligned == "") { /* need to throw an error */ }
+ name = "";
+ unaligned = "";
+ aligned = "";
+ pairwise = "";
- string processed;
+ numBases = 0;
+ alignmentLength = 0;
+ isAligned = 0;
+ startPos = -1;
+ endPos = -1;
+ longHomoPolymer = 0;
+ ambigBases = 0;
- for(int i=0;i<unaligned.length();i++) {
- if(toupper(unaligned[i]) == 'A') { processed += '0'; }
- else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
- else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
- else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
- else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
- else { processed += '4'; }
- }
- return processed;
-}
+}
//********************************************************************************************************************
else {
unaligned = sequence;
}
+ numBases = unaligned.length();
}
void Sequence::setAligned(string sequence){
//if the alignment starts or ends with a gap, replace it with a period to indicate missing data
-
- if(sequence[0] == '-'){
- for(int i=0;i<sequence.length();i++){
- if(sequence[i] == '-'){
- sequence[i] = '.';
+ aligned = sequence;
+ alignmentLength = aligned.length();
+
+ if(aligned[0] == '-'){
+ for(int i=0;i<alignmentLength;i++){
+ if(aligned[i] == '-'){
+ aligned[i] = '.';
}
else{
break;
}
}
- for(int i=sequence.length()-1;i>=0;i--){
- if(sequence[i] == '-'){
- sequence[i] = '.';
+ for(int i=alignmentLength-1;i>=0;i--){
+ if(aligned[i] == '-'){
+ aligned[i] = '.';
}
else{
break;
}
}
}
- aligned = sequence;
+
}
//********************************************************************************************************************
//********************************************************************************************************************
+string Sequence::convert2ints() {
+
+ if(unaligned == "") { /* need to throw an error */ }
+
+ string processed;
+
+ for(int i=0;i<unaligned.length();i++) {
+ if(toupper(unaligned[i]) == 'A') { processed += '0'; }
+ else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
+ else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
+ else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
+ else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
+ else { processed += '4'; }
+ }
+ return processed;
+}
+
+//********************************************************************************************************************
+
string Sequence::getName(){
return name;
}
//********************************************************************************************************************
-int Sequence::getLength(){
- if(unaligned.length() > aligned.length())
- return unaligned.length();
- return aligned.length();
+int Sequence::getNumBases(){
+ return numBases;
}
//********************************************************************************************************************
void Sequence::printSequence(ostream& out){
- string toPrint = unaligned;
- if(aligned.length() > unaligned.length())
- toPrint = aligned;
- out << ">" << name << "\n" << toPrint << "\n";
+
+ out << ">" << name << endl;
+ if(isAligned){
+ out << aligned << endl;
+ }
+ else{
+ out << unaligned << endl;
+ }
}
//********************************************************************************************************************
-int Sequence::getUnalignLength(){
- return unaligned.length();
+int Sequence::getAlignLength(){
+ return alignmentLength;
}
//********************************************************************************************************************
-int Sequence::getAlignLength(){
- return aligned.length();
+int Sequence::getAmbigBases(){
+ if(ambigBases == -1){
+
+ for(int j=0;j<numBases;j++){
+ if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
+ ambigBases++;
+ }
+ }
+ }
+
+ return ambigBases;
}
//********************************************************************************************************************
+int Sequence::getLongHomoPolymer(){
+ if(longHomoPolymer == 0){
+ int homoPolymer = 1;
+ for(int j=1;j<numBases;j++){
+ if(unaligned[j] == unaligned[j-1]){
+ homoPolymer++;
+ }
+ else{
+ if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
+ homoPolymer = 1;
+ }
+ }
+ if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
+ }
+ return longHomoPolymer;
+}
+
+//********************************************************************************************************************
+int Sequence::getStartPos(){
+ if(endPos == -1){
+ for(int j = 0; j < alignmentLength; j++) {
+ if(aligned[j] != '.'){
+ startPos = j;
+ break;
+ }
+ }
+ }
+ return startPos;
+}
+
+//********************************************************************************************************************
+
+int Sequence::getEndPos(){
+ if(endPos == -1){
+ for(int j=alignmentLength-1;j>=0;j--){
+ if(aligned[j] != '.'){
+ endPos = j;
+ break;
+ }
+ }
+ }
+ return endPos;
+}
+
+//********************************************************************************************************************
+
+bool Sequence::getIsAligned(){
+ return isAligned;
+}
+
+//********************************************************************************************************************
string getAligned();
string getPairwise();
string getUnaligned();
- int getLength(); //the greater of the lengths of unaligned and aligned
- int getUnalignLength();
+ int getNumBases();
+ int getStartPos();
+ int getEndPos();
int getAlignLength();
+ int getAmbigBases();
+ int getLongHomoPolymer();
+ bool getIsAligned();
void printSequence(ostream&);
private:
+ void initialize();
string name;
string unaligned;
string aligned;
string pairwise;
- int length;
- int lengthAligned;
+ int numBases;
+ int alignmentLength;
+ bool isAligned;
+ int longHomoPolymer;
+ int ambigBases;
+ int startPos, endPos;
};
/**************************************************************************************************/
commands["help"] = "help";
commands["filter.seqs"] = "filter.seqs";
commands["align.seqs"] = "align.seqs";
+ commands["summary.seqs"] = "summary.seqs";
commands["quit"] = "quit";
string filterseqsArray[] = {"fasta","phylip","clustal","nexus", "trump", "soft", "hard", "vertical"};
commandParameters["filter.seqs"] = addParameters(filterseqsArray, sizeof(filterseqsArray)/sizeof(string));
+ string summaryseqsArray[] = {"fasta","phylip","clustal","nexus"};
+ commandParameters["summary.seqs"] = addParameters(summaryseqsArray, sizeof(summaryseqsArray)/sizeof(string));
+
string vennArray[] = {"groups","line","label","calc"};
commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string));