/***********************************************************************/
-AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
- Cluster(rav, lv, dm)
+AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
+ Cluster(rav, lv, dm, c)
{
saveRow = -1;
saveCol = -1;
saveRow = smallRow;
saveCol = smallCol;
}
-
+
+ float oldColCell = colCell->dist;
+
colCell->dist = (colBin * colCell->dist + rowBin * rowCell->dist) / totalBin;
+
+ //warn user if merge with value above cutoff produces a value below cutoff
+ if ((colCell->dist < cutoff) && ((oldColCell > cutoff) || (rowCell->dist > cutoff)) ) {
+ mothurOut("Warning: merging " + toString(oldColCell) + " with " + toString(rowCell->dist) + ", new value = " + toString(colCell->dist) + ". Results will differ from those if cutoff was used in the read.dist command."); mothurOutEndLine();
+ }
+
return(true);
}
catch(exception& e) {
namefile = validParameter.validFile(parameters, "name", false);
- if (fastaFileName == "not found") { namefile = ""; }
+ if (namefile == "not found") { namefile = ""; }
else {
splitAtDash(namefile, namefileNames);
#endif
//make taxonomy tree from new taxonomy file
PhyloTree taxaBrowser;
-
+
ifstream in;
openInputFile(tempTaxonomyFile, in);
/***********************************************************************/
-Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
-rabund(rav), list(lv), dMatrix(dm)
+Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
+rabund(rav), list(lv), dMatrix(dm), cutoff(c)
{
/*
cout << "sizeof(MatData): " << sizeof(MatData) << endl;
class Cluster {
public:
- Cluster(RAbundVector*, ListVector*, SparseMatrix*);
+ Cluster(RAbundVector*, ListVector*, SparseMatrix*, float);
virtual void update();
virtual string getTag() = 0;
virtual void setMapWanted(bool m);
int smallCol;
float smallDist;
bool mapWanted;
+ float cutoff;
map<string, int> seq2Bin;
vector<MatVec> seqVec; // contains vectors of cells related to a certain sequence
class CompleteLinkage : public Cluster {
public:
- CompleteLinkage(RAbundVector*, ListVector*, SparseMatrix*);
+ CompleteLinkage(RAbundVector*, ListVector*, SparseMatrix*, float);
bool updateDistance(MatData& colCell, MatData& rowCell);
string getTag();
class SingleLinkage : public Cluster {
public:
- SingleLinkage(RAbundVector*, ListVector*, SparseMatrix*);
+ SingleLinkage(RAbundVector*, ListVector*, SparseMatrix*, float);
void update();
bool updateDistance(MatData& colCell, MatData& rowCell);
string getTag();
class AverageLinkage : public Cluster {
public:
- AverageLinkage(RAbundVector*, ListVector*, SparseMatrix*);
+ AverageLinkage(RAbundVector*, ListVector*, SparseMatrix*, float);
bool updateDistance(MatData& colCell, MatData& rowCell);
string getTag();
}
//create cluster
- if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix); }
- else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix); }
- else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix); }
+ if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff); }
+ else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff); }
+ else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff); }
tag = cluster->getTag();
fileroot = getRootName(globaldata->inputFileName);
/***********************************************************************/
-CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
- Cluster(rav, lv, dm)
+CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
+ Cluster(rav, lv, dm, c)
{}
/***********************************************************************/
if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
if (distance == -1) { distance = 1000000; }
-
- if(distance < cutoff && itA != itB){
-
+
+ if((distance < cutoff) && (itA != itB)){
if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
refRow = itA->second;
refCol = itB->second;
}
out.close();
fileHandle.close();
-
+
string squareFile;
if(lt == 0){ // oops, it was square
squareFile = filename;
rowMap.clear();
//save row you just read
- rowMap[second] = dist;
-
+ if (dist < cutoff) {
+ rowMap[second] = dist;
+ }
}else{
- rowMap[second] = dist;
+ if (dist < cutoff) {
+ rowMap[second] = dist;
+ }
}
}
#include "nameassignment.hpp"
+//**********************************************************************************************************************
+// This class takes a distance matrix file and converts it to a file where each row contains all distances below the cutoff
+// for a given sequence.
+
+// Example:
+ /* 5
+ A
+ B 0.01
+ C 0.015 0.03
+ D 0.03 0.02 0.02
+ E 0.04 0.05 0.03 0.02
+
+ becomes
+
+ 0 4 1 0.01 2 0.015 3 0.03 4 0.04
+ 1 4 0 0.01 2 0.03 3 0.02 4 0.05
+ 2 4 0 0.015 1 0.03 3 0.02 4 0.03
+ 3 4 0 0.03 1 0.02 2 0.02 4 0.02
+ 4 4 0 0.04 1 0.05 2 0.03 3 0.02
+
+ column 1 - sequence name converted to row number
+ column 2 - numDists under cutoff
+ rest of line - sequence row -> distance, sequence row -> distance
+
+ if you had a cutoff of 0.03 then the file would look like,
+
+ 0 3 1 0.01 2 0.015 3 0.03
+ 1 3 0 0.01 2 0.03 3 0.02
+ 2 4 0 0.015 1 0.03 3 0.02 4 0.03
+ 3 4 0 0.03 1 0.02 2 0.02 4 0.02
+ 4 2 2 0.03 3 0.02
+
+ This class also creates a vector of ints, rowPos.
+
+ rowPos[0] = position in the file of distances related to sequence 0.
+ If a sequence is excluded by the cutoff, it's rowPos = -1.
+*/
//**********************************************************************************************************************
class FormatMatrix {
help(); abort = true;
} else {
//valid paramters for this command
- string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff"};
+ string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
void GetOTURepCommand::help(){
try {
- mothurOut("The get.oturep command can only be executed after a successful read.dist command.\n");
- mothurOut("The get.oturep command parameters are list, fasta, name, group, large, sorted and label. The fasta and list parameters are required.\n");
+ mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label. The fasta and list parameters are required, as well as phylip or column and name.\n");
mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
- mothurOut("The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
- mothurOut("Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, name=amazon.names).\n");
+ mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
+ mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
+ mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
+ mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
mothurOut("The default value for label is all labels in your inputfile.\n");
mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n");
}
//close and remove formatted matrix file
- inRow.close();
- remove(distFile.c_str());
+ if (large) {
+ inRow.close();
+ //remove(distFile.c_str());
+ }
globaldata->gListVector = NULL;
delete input; globaldata->ginput = NULL;
if (method != "average") {
openInputFile(distfile, filehandle);
- }else{ firstRead = true; }
+ }else{
+ processFile();
+ }
}
catch(exception& e) {
errorOut(e, "HCluster", "HCluster");
if(method != "average") {
sameSeqs = getSeqsFNNN();
}else{
- if (firstRead) { processFile(); }
sameSeqs = getSeqsAN();
}
remove(distfile.c_str());
rename(outTemp.c_str(), distfile.c_str());
-
- firstRead = false;
}
catch(exception& e) {
errorOut(e, "HCluster", "processFile");
int smallCol;
float smallDist, cutoff;
map<string, int> seq2Bin;
- bool mapWanted, exitedBreak, firstRead;
+ bool mapWanted, exitedBreak;
seqDist next;
string method, distfile;
ifstream filehandle;
print_start = true;
start = time(NULL);
-
- //ifstream in;
- //openInputFile(distfile, in);
cluster = new HCluster(rabund, list, method, distfile, globaldata->nameMap, cutoff);
vector<seqDist> seqs; seqs.resize(1); // to start loop
for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
if (seqs[i].seq1 != seqs[i].seq2) {
- bool clustered = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+ cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
float rndDist = roundDist(seqs[i].dist, precision);
- if (clustered) {
- if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
- printData("unique");
- }
- else if((rndDist != rndPreviousDist)){
- printData(toString(rndPreviousDist, length-1));
- }
+ if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
+ printData("unique");
}
-
+ else if((rndDist != rndPreviousDist)){
+ printData(toString(rndPreviousDist, length-1));
+ }
+
previousDist = seqs[i].dist;
rndPreviousDist = rndDist;
oldRAbund = *rabund;
}
}
}
-
- //in.close();
if(previousDist <= 0.0000){
printData("unique");
delete read;
//create cluster
- if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix); }
- else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix); }
- else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix); }
+ if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff); }
+ else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff); }
+ else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff); }
cluster->setMapWanted(true);
//cluster using cluster classes
try {
//remove old logfile
- string logFileName = "mothur.logFile";
- remove(logFileName.c_str());
+ string log = "mothur.logFile";
+ remove(log.c_str());
+
+ time_t ltime = time(NULL); /* calendar time */
+ string logFileName = "mothur." + toString(asctime( localtime(<ime) )) + ".logfile";
//version
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
while(bail == 0) { bail = mothur->getInput(); }
delete mothur;
-
+
+ rename(log.c_str(), logFileName.c_str()); //logfile with timestamp
+
return 0;
}
catch(exception& e) {
#define isnan(x) ((x) != (x))
#define isinf(x) (fabs(x) == std::numeric_limits<double>::infinity())
-
typedef unsigned long ull;
struct IntNode {
numDists = globaldata->gSparseMatrix->getNNodes();
//cout << "matrix contains " << numDists << " distances." << endl;
- int lines = cutoff / (1.0/precision);
+ /* int lines = cutoff / (1.0/precision);
vector<float> dist_cutoff(lines+1,0);
for (int i = 0; i <= lines;i++) {
dist_cutoff[i] = (i + 0.5) / precision;
}
}
}
-
+*/
// string dist_string = "Dist:";
// string count_string = "Count: ";
//for (int i = 0; i <= lines;i++) {
}
Progress* reading;
-
+
if(square == 0){
reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
int index = 0;
-
+
for(int i=1;i<nseqs;i++){
fileHandle >> name;
matrixNames.push_back(name);
list->setLabel("0");
fileHandle.close();
- if(nameMap != NULL){
+ /* if(nameMap != NULL){
for(int i=0;i<matrixNames.size();i++){
nameMap->erase(matrixNames[i]);
}
//should probably tell them what is missing if we missed something
mothurOut("missed something\t" + toString(nameMap->size())); mothurOutEndLine();
}
- }
+ } */
}
catch(exception& e) {
string name = names.substr(0,names.find_first_of(','));
names = names.substr(names.find_first_of(',')+1, names.length());
string group = groupMap->getGroup(name);
-
+ cout << name << endl;
if(group == "not found") { outMisMatch << name << endl; }
}
-
+ cout << names << endl;
//get last name
string group = groupMap->getGroup(names);
if(group == "not found") { outMisMatch << names << endl; }
/***********************************************************************/
-SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm) :
-Cluster(rav, lv, dm)
+SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseMatrix* dm, float c) :
+Cluster(rav, lv, dm, c)
{}