}
//***************************************************************************************************************
-void Bellerophon::print(ostream& out) {
+void Bellerophon::print(ostream& out, ostream& outAcc) {
try {
int above1 = 0;
out << "Name\tScore\tLeft\tRight\t" << endl;
//calc # of seqs with preference above 1.0
if (pref[i].score[0] > 1.0) {
above1++;
+ outAcc << pref[i].name << endl;
mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
}
~Bellerophon() {};
int getChimeras();
- void print(ostream&);
+ void print(ostream&, ostream&);
void setCons(string){};
void setQuantiles(string) {};
out << "For full window mapping info refer to " << mapInfo << endl << endl;
}
//***************************************************************************************************************
-void Ccode::print(ostream& out) {
+void Ccode::print(ostream& out, ostream& outAcc) {
try {
mothurOutEndLine();
if (results) {
mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
+ outAcc << querySeq->getName() << endl;
}
//free memory
~Ccode();
int getChimeras(Sequence* query);
- void print(ostream&);
+ void print(ostream&, ostream&);
void printHeader(ostream&);
private:
vector<int> t; t.resize(seqs[0]->getAligned().length(), 0);
vector<int> g; g.resize(seqs[0]->getAligned().length(), 0);
vector<int> c; c.resize(seqs[0]->getAligned().length(), 0);
-
+
filterString = (string(seqs[0]->getAligned().length(), '1'));
//for each sequence
for (int i = 0; i < seqs.size(); i++) {
string seqAligned = seqs[i]->getAligned();
-
+
for (int j = 0; j < seqAligned.length(); j++) {
//if this spot is a gap
if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; }
if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; }
else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) { filterString[i] = '0'; numColRemoved++; }
- //cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl;
+ cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl;
}
-
-//cout << "filter = " << filterString << endl;
mothurOut("Filter removed " + toString(numColRemoved) + " columns."); mothurOutEndLine();
return filterString;
openInputFile(file, in);
vector<Sequence*> container;
int count = 0;
- int length = 0;
+ length = 0;
unaligned = false;
//read in seqs and store in vector
virtual void setTemplateSeqs(vector<Sequence*> t) { templateSeqs = t; }
virtual bool getUnaligned() { return unaligned; }
virtual void setTemplateFile(string t) { templateFileName = t; }
+ virtual int getLength() { return length; }
virtual void setCons(string){};
virtual void setQuantiles(string){};
virtual void printHeader(ostream&){};
virtual int getChimeras(Sequence*){ return 0; }
virtual int getChimeras(){ return 0; }
- virtual void print(ostream&){};
+ virtual void print(ostream&, ostream&){};
protected:
vector<Sequence*> templateSeqs;
bool filter, correction, svg, unaligned;
- int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters;
+ int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, length;
float divR;
string seqMask, quanfile, filterString, name, outputDir, templateFileName;
Sequence* getSequence(string); //find sequence from name
}
}
//***************************************************************************************************************
-void ChimeraCheckRDP::print(ostream& out) {
+void ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
try {
mothurOut("Processing: " + querySeq->getName()); mothurOutEndLine();
~ChimeraCheckRDP();
int getChimeras(Sequence*);
- void print(ostream&);
+ void print(ostream&, ostream&);
void doPrep();
private:
chimera->setIters(iters);
chimera->setTemplateFile(templatefile);
-
+ string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
+ string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".accnos";
+
vector<Sequence*> templateSeqs;
if ((method != "bellerophon") && (method != "chimeracheck")) {
templateSeqs = chimera->readSeqs(templatefile);
if (chimera->getUnaligned()) {
- mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine();
+ mothurOut("Your template sequences are different lengths, please correct."); mothurOutEndLine();
//free memory
for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
+ delete chimera;
return 0;
}
}else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
chimera->getChimeras();
- string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
ofstream out;
- openOutputFile(outputFName, out);
+ openOutputFile(outputFileName, out);
- chimera->print(out);
+ ofstream out2;
+ openOutputFile(accnosFileName, out2);
+
+ chimera->print(out, out2);
out.close();
+ out2.close();
+
return 0;
}
//some methods need to do prep work before processing the chimeras
chimera->doPrep();
+ templateSeqsLength = chimera->getLength();
+
ofstream outHeader;
string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
openOutputFile(tempHeader, outHeader);
chimera->printHeader(outHeader);
outHeader.close();
- string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile);
+ driver(lines[0], outputFileName, fastafile, accnosFileName);
}else{
vector<int> positions;
}
- createProcesses(outputFileName, fastafile);
-
+ createProcesses(outputFileName, fastafile, accnosFileName);
+
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
-
+ rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
+
//append alignment and report files
for(int i=1;i<processors;i++){
appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+
+ appendOutputFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
+ remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+
}
}
inFASTA.close();
lines.push_back(new linePair(0, numSeqs));
- driver(lines[0], outputFileName, fastafile);
+ driver(lines[0], outputFileName, fastafile, accnosFileName);
#endif
//mothurOut("Output File Names: ");
//if ((filter) && (method == "bellerophon")) { mothurOut(
//if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
// else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
-
+
appendOutputFiles(tempHeader, outputFileName);
+
remove(outputFileName.c_str());
rename(tempHeader.c_str(), outputFileName.c_str());
-
- for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
+
+ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
+ delete chimera;
- if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); }
+ if (method == "chimeracheck") { remove(accnosFileName.c_str()); mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); }
mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); mothurOutEndLine();
}
}//**********************************************************************************************************************
-int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){
+int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename, string accnos){
try {
ofstream out;
openOutputFile(outputFName, out);
+ ofstream out2;
+ openOutputFile(accnos, out2);
+
ifstream inFASTA;
openInputFile(filename, inFASTA);
Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
-
- //find chimeras
- chimera->getChimeras(candidateSeq);
+
+ if ((candidateSeq->getAligned().length() != templateSeqsLength) && (method != "chimeracheck")) { //chimeracheck does not require seqs to be aligned
+ mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); mothurOutEndLine();
+ }else{
+ //find chimeras
+ chimera->getChimeras(candidateSeq);
- //print results
- chimera->print(out);
+ //print results
+ chimera->print(out, out2);
+ }
}
delete candidateSeq;
if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine(); }
out.close();
+ out2.close();
inFASTA.close();
return 1;
/**************************************************************************************************/
-void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) {
+void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename, string accnos) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+ driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
exit(0);
}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
ifstream input;
openOutputFileAppend(temp, output);
- openInputFile(filename, input);
+ openInputFile(filename, input, "noerror");
while(char c = input.get()){
if(input.eof()) { break; }
vector<int> processIDS; //processid
vector<linePair*> lines;
- int driver(linePair*, string, string);
- void createProcesses(string, string);
+ int driver(linePair*, string, string, string);
+ void createProcesses(string, string, string);
void appendOutputFiles(string, string);
bool abort;
string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir, search;
bool filter, correction, svg, printAll, realign;
- int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs;
+ int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
Chimera* chimera;
out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
}
//***************************************************************************************************************
-void ChimeraSlayer::print(ostream& out) {
+void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
try {
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
if (chimeraFlag == "yes") {
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
+ outAcc << querySeq->getName() << endl;
}
}
~ChimeraSlayer();
int getChimeras(Sequence*);
- void print(ostream&);
+ void print(ostream&, ostream&);
void printHeader(ostream&);
void doPrep();
/***********************************************************************/
Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
-rabund(rav), list(lv), dMatrix(dm), cutoff(c)
+rabund(rav), list(lv), dMatrix(dm)
{
/*
cout << "sizeof(MatData): " << sizeof(MatData) << endl;
seqVec[currentCell->column].push_back(currentCell);
}
mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
}
/***********************************************************************/
//This function clusters based on the method of the derived class
//At the moment only average and complete linkage are covered, because
//single linkage uses a different approach.
-void Cluster::update(){
+void Cluster::update(double& cutOFF){
try {
getRowColCells();
- vector<int> found(nColCells, 0);
+ vector<int> foundCol(nColCells, 0);
+
int search;
bool changed;
} else {
search = rowCells[i]->row;
}
-
+
+ bool merged = false;
for (int j=0;j<nColCells;j++) {
- if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) {
+ if (!((colCells[j]->row == smallRow) && (colCells[j]->column == smallCol))) { //if you are not hte smallest distance
if (colCells[j]->row == search || colCells[j]->column == search) {
- found[j] = 1;
+ foundCol[j] = 1;
+ merged = true;
changed = updateDistance(colCells[j], rowCells[i]);
// If the cell's distance changed and it had the same distance as
// the smallest distance, invalidate the mins vector in SparseMatrix
}
break;
}
- }
+ }
}
- removeCell(rowCells[i], i , -1);
+ //if not merged it you need it for warning
+ if (!merged) {
+ mothurOut("Warning: trying to merge cell " + toString(rowCells[i]->row+1) + " " + toString(rowCells[i]->column+1) + " distance " + toString(rowCells[i]->dist) + " with value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine();
+ if (cutOFF > rowCells[i]->dist) { cutOFF = rowCells[i]->dist; mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); }
+
+ }
+ removeCell(rowCells[i], i , -1);
+
}
}
clusterBins();
// Special handling for singlelinkage case, not sure whether this
// could be avoided
for (int i=nColCells-1;i>=0;i--) {
- if (found[i] == 0) {
- removeCell(colCells[i], -1, i);
-cout << "smallRow = " << smallRow+1 << " smallCol = " << smallCol+1 << endl;
+ if (foundCol[i] == 0) {
if (!((colCells[i]->row == smallRow) && (colCells[i]->column == smallCol))) {
- mothurOut("Warning: merging " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results will differ from those if cutoff was used in the read.dist command."); mothurOutEndLine();
+ mothurOut("Warning: merging cell " + toString(colCells[i]->row+1) + " " + toString(colCells[i]->column+1) + " distance " + toString(colCells[i]->dist) + " value above cutoff. Results may vary from using cutoff at cluster command instead of read.dist."); mothurOutEndLine();
+ if (cutOFF > colCells[i]->dist) { cutOFF = colCells[i]->dist; mothurOut("changing cutoff to " + toString(cutOFF)); mothurOutEndLine(); }
}
+ removeCell(colCells[i], -1, i);
}
}
}
public:
Cluster(RAbundVector*, ListVector*, SparseMatrix*, float);
- virtual void update();
+ virtual void update(double&);
virtual string getTag() = 0;
virtual void setMapWanted(bool m);
virtual map<string, int> getSeqtoBin() { return seq2Bin; }
loops++;
- cluster->update();
+ cluster->update(cutoff);
float dist = matrix->getSmallDist();
float rndDist = roundDist(dist, precision);
else {
//valid paramters for this command
- string Array[] = {"fasta", "phylip", "calc", "countends", "cutoff", "processors", "outputdir","inputdir"};
+ string Array[] = {"fasta", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; }
convert(temp, processors);
- phylip = validParameter.validFile(parameters, "phylip", false); if(phylip == "not found"){ phylip = "F"; }
-
+ output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; }
+
+ if ((output != "column") && (output != "lt") && (output != "square")) { mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); mothurOutEndLine(); output = "column"; }
ValidCalculators validCalculator;
void DistanceCommand::help(){
try {
mothurOut("The dist.seqs command reads a file containing sequences and creates a distance file.\n");
- mothurOut("The dist.seqs command parameters are fasta, calc, countends, cutoff and processors. \n");
+ mothurOut("The dist.seqs command parameters are fasta, calc, countends, output, cutoff and processors. \n");
mothurOut("The fasta parameter is required.\n");
mothurOut("The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap.\n");
mothurOut("The countends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T.\n");
mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n");
+ mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n");
mothurOut("The processors parameter allows you to specify number of processors to use. The default is 1.\n");
mothurOut("The dist.seqs command should be in the following format: \n");
mothurOut("dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n");
string outputFile;
- //doses the user want the phylip formatted file as well
- if (isTrue(phylip) == true) {
+ if (output == "lt") { //does the user want lower triangle phylip formatted file
outputFile = outputDir + getRootName(getSimpleName(fastafile)) + "phylip.dist";
remove(outputFile.c_str());
//output numSeqs to phylip formatted dist file
- }else { //user wants column format
+ }else if (output == "column") { //user wants column format
outputFile = outputDir + getRootName(getSimpleName(fastafile)) + "dist";
remove(outputFile.c_str());
+ }else { //assume square
+ outputFile = outputDir + getRootName(getSimpleName(fastafile)) + "square.dist";
+ remove(outputFile.c_str());
}
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
driver(0, numSeqs, outputFile, cutoff);
#endif
+ if (output == "square") { convertMatrix(outputFile); }
+
delete distCalculator;
return 0;
outFile.setf(ios::fixed, ios::showpoint);
outFile << setprecision(4);
- if(isTrue(phylip) && startLine == 0){ outFile << alignDB.getNumSeqs() << endl; }
+ if((output == "lt") && startLine == 0){ outFile << alignDB.getNumSeqs() << endl; }
for(int i=startLine;i<endLine;i++){
- if(isTrue(phylip)) {
+ if(output == "lt") {
string name = alignDB.get(i).getName();
if (name.length() < 10) { //pad with spaces to make compatible
while (name.length() < 10) { name += " "; }
double dist = distCalculator->getDist();
if(dist <= cutoff){
- if (!isTrue(phylip)) { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
+ if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; }
}
- if (isTrue(phylip)) { outFile << dist << '\t'; }
+ if (output == "lt") { outFile << dist << '\t'; }
+ if (output == "square") { //make a square column you can convert to square phylip
+ outFile << alignDB.get(i).getName() << '\t' << alignDB.get(j).getName() << '\t' << dist << endl;
+ outFile << alignDB.get(j).getName() << '\t' << alignDB.get(i).getName() << '\t' << dist << endl;
+ }
+
}
- if (isTrue(phylip) == true) { outFile << endl; }
+ if (output == "lt") { outFile << endl; }
if(i % 100 == 0){
mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); mothurOutEndLine();
exit(1);
}
}
+/**************************************************************************************************/
+void DistanceCommand::convertMatrix(string outputFile) {
+ try{
+ //sort file by first column so the distances for each row are together
+ string outfile = getRootName(outputFile) + "sorted.dist.temp";
+
+ //use the unix sort
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ string command = "sort -n " + outputFile + " -o " + outfile;
+ system(command.c_str());
+ #else //sort using windows sort
+ string command = "sort " + outputFile + " /O " + outfile;
+ system(command.c_str());
+ #endif
+
+
+ //output to new file distance for each row and save positions in file where new row begins
+ ifstream in;
+ openInputFile(outfile, in);
+
+ ofstream out;
+ openOutputFile(outputFile, out);
+
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+ out << alignDB.getNumSeqs() << endl;
+
+ //get first currentRow
+ string first, currentRow, second;
+ float dist;
+ map<string, float> rowDists; //take advantage of the fact that maps are already sorted by key
+ map<string, float>::iterator it;
+
+ in >> first;
+ currentRow = first;
+
+ rowDists[first] = 0.00; //distance to yourself is 0.0
+
+ in.seekg(0);
+ //openInputFile(outfile, in);
+
+ while(!in.eof()) {
+ in >> first >> second >> dist; gobble(in);
+
+ if (first != currentRow) {
+ //print out last row
+ out << currentRow << '\t'; //print name
+
+ //print dists
+ for (it = rowDists.begin(); it != rowDists.end(); it++) {
+ out << it->second << '\t';
+ }
+ out << endl;
+
+ //start new row
+ currentRow = first;
+ rowDists.clear();
+ rowDists[first] = 0.00;
+ rowDists[second] = dist;
+ }else{
+ rowDists[second] = dist;
+ }
+ }
+ //print out last row
+ out << currentRow << '\t'; //print name
+
+ //print dists
+ for (it = rowDists.begin(); it != rowDists.end(); it++) {
+ out << it->second << '\t';
+ }
+ out << endl;
+
+ in.close();
+ out.close();
+
+ remove(outfile.c_str());
+
+ }
+ catch(exception& e) {
+ errorOut(e, "DistanceCommand", "convertMatrix");
+ exit(1);
+ }
+}
/**************************************************************************************************
void DistanceCommand::appendFiles(string temp, string filename) {
try{
}
}
/**************************************************************************************************/
+
+
Dist* distCalculator;
SequenceDB alignDB;
- string countends, phylip, fastafile, calc, outputDir;
+ string countends, output, fastafile, calc, outputDir;
int processors;
float cutoff;
map<int, int> processIDS; //end line, processid
//void appendFiles(string, string);
void createProcesses(string);
int driver(/*Dist*, SequenceDB, */int, int, string, float);
+ void convertMatrix(string);
};
else {
//valid paramters for this command
- string Array[] = {"label","calc","groups","outputdir","inputdir"};
+ string Array[] = {"label","calc","groups","outputdir","inputdir", "output"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
else { allLines = 1; }
}
+ output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
+ if ((output != "lt") && (output != "square")) { mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); mothurOutEndLine(); output = "lt"; }
+
//if the user has not specified any labels use the ones from read.otu
if (label == "") {
allLines = globaldata->allLines;
void MatrixOutputCommand::help(){
try {
mothurOut("The dist.shared command can only be executed after a successful read.otu command.\n");
- mothurOut("The dist.shared command parameters are groups, calc and label. No parameters are required.\n");
+ mothurOut("The dist.shared command parameters are groups, calc, output and label. No parameters are required.\n");
mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n");
mothurOut("The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n");
+ mothurOut("The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n");
mothurOut("Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n");
mothurOut("The default value for groups is all the groups in your groupfile.\n");
mothurOut("The default value for calc is jclass and thetayc.\n");
void MatrixOutputCommand::printSims(ostream& out) {
try {
- //output column headers
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+ //output num seqs
out << simMatrix.size() << endl;
- for (int m = 0; m < simMatrix.size(); m++) {
- out << lookup[m]->getGroup() << '\t';
- for (int n = 0; n < m; n++) {
- out << simMatrix[m][n] << '\t';
+ if (output == "lt") {
+ for (int m = 0; m < simMatrix.size(); m++) {
+ out << lookup[m]->getGroup() << '\t';
+ for (int n = 0; n < m; n++) {
+ out << simMatrix[m][n] << '\t';
+ }
+ out << endl;
+ }
+ }else{
+ for (int m = 0; m < simMatrix.size(); m++) {
+ out << lookup[m]->getGroup() << '\t';
+ for (int n = 0; n < simMatrix[m].size(); n++) {
+ out << simMatrix[m][n] << '\t';
+ }
+ out << endl;
}
- out << endl;
}
-
}
catch(exception& e) {
errorOut(e, "MatrixOutputCommand", "printSims");
}
}
- exportFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+ exportFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
openOutputFile(exportFileName, out);
printSims(out);
out.close();
InputData* input;
ValidCalculators* validCalculator;
vector<SharedRAbundVector*> lookup;
- string exportFileName;
+ string exportFileName, output;
int numGroups;
ofstream out;
//cluster using cluster classes
while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
- cluster->update();
+ cluster->update(cutoff);
float dist = distMatrix->getSmallDist();
float rndDist = roundDist(dist, precision);
string blastfile, method, namefile, overlapFile, distFile, outputDir;
ofstream sabundFile, rabundFile, listFile;
- float cutoff, penalty;
+ double cutoff;
+ float penalty;
int precision, length, precisionLength;
bool abort, minWanted, hclusterWanted, merge;
bool reRead = false;
//create filter if needed for later
if (filter) {
- reRead = true;
-
- if (seqMask != "") {
- //mask templates
- for (int i = 0; i < templateSeqs.size(); i++) {
- decalc->runMask(templateSeqs[i]);
- }
- }
+cout << "filter" << endl;
+
//read in all query seqs
ifstream in;
openInputFile(fastafile, in);
//merge query seqs and template seqs
temp = templateSeqs;
for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
-
+cout << temp.size() << endl;
+ if (seqMask != "") {
+ reRead = true;
+ cout << "masked " << seqMask << endl;
+ //mask templates
+ for (int i = 0; i < temp.size(); i++) {
+ decalc->runMask(temp[i]);
+ }
+ }
+
mergedFilterString = createFilter(temp, 0.5);
-
+ cout << "here" << endl;
//reread template seqs
- for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
+ //for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
}
}
}
+ if (filter) {
+ reRead = true;
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ runFilter(templateSeqs[i]);
+ }
+ }
+
mothurOut("Calculating quantiles for your template. This can take a while... I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command. Providing the .quan file will dramatically improve speed. "); cout.flush();
if (processors == 1) {
quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
if ((!filter) && (seqMask == "")) {
noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.quan";
- }else if ((filter) && (seqMask == "")) {
- noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.quan";
}else if ((!filter) && (seqMask != "")) {
noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.masked.quan";
}else if ((filter) && (seqMask != "")) {
- noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.masked.quan";
+ noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered." + getSimpleName(getRootName(fastafile)) + "masked.quan";
+ }else if ((filter) && (seqMask == "")) {
+ noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered." + getSimpleName(getRootName(fastafile)) + "quan";
}
+
+
decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
- openOutputFile(noOutliers, out5);
-
+ openOutputFile(noOutliers, out5);
//adjust quantiles
for (int i = 0; i < quantilesMembers.size(); i++) {
vector<float> temp;
}
}
//***************************************************************************************************************
-void Pintail::print(ostream& out) {
+void Pintail::print(ostream& out, ostream& outAcc) {
try {
int index = ceil(deviation);
out << querySeq->getName() << '\t' << "div: " << deviation << "\tstDev: " << DE << "\tchimera flag: " << chimera << endl;
if (chimera == "Yes") {
mothurOut(querySeq->getName() + "\tdiv: " + toString(deviation) + "\tstDev: " + toString(DE) + "\tchimera flag: " + chimera); mothurOutEndLine();
+ outAcc << querySeq->getName() << endl;
}
out << "Observed\t";
~Pintail();
int getChimeras(Sequence*);
- void print(ostream&);
+ void print(ostream&, ostream&);
void setCons(string c) { consfile = c; }
void setQuantiles(string q) { quanfile = q; }
string fastafile, consfile;
vector<linePair*> templateLines;
- Sequence*querySeq;
+ Sequence* querySeq;
Sequence* bestfit; //closest match to query in template