7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7EC3D4530FA0FFF900338DA5 /* whittaker.cpp */; };
8DD76F6A0486A84900D96B5E /* Mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* Mothur.1 */; };
A7027F2810DFF86D00BF45FE /* preclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7027F2710DFF86D00BF45FE /* preclustercommand.cpp */; };
+ A704E8A31106045D00870157 /* otuhierarchycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A704E8A21106045D00870157 /* otuhierarchycommand.cpp */; };
A70B53AA0F4CD7AD0064797E /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */; };
A70B53AB0F4CD7AD0064797E /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */; };
A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
8DD76F6C0486A84900D96B5E /* mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mothur; sourceTree = BUILT_PRODUCTS_DIR; };
A7027F2610DFF86D00BF45FE /* preclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = preclustercommand.h; sourceTree = SOURCE_ROOT; };
A7027F2710DFF86D00BF45FE /* preclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = preclustercommand.cpp; sourceTree = SOURCE_ROOT; };
+ A704E8A11106045D00870157 /* otuhierarchycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = otuhierarchycommand.h; sourceTree = SOURCE_ROOT; };
+ A704E8A21106045D00870157 /* otuhierarchycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuhierarchycommand.cpp; sourceTree = SOURCE_ROOT; };
A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getgroupcommand.cpp; sourceTree = SOURCE_ROOT; };
A70B53A50F4CD7AD0064797E /* getgroupcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getgroupcommand.h; sourceTree = SOURCE_ROOT; };
A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlabelcommand.cpp; sourceTree = SOURCE_ROOT; };
A7E0AAB510D2886D00CF407B /* mgclustercommand.cpp */,
375873F70F7D649C0040F377 /* nocommands.h */,
375873F60F7D649C0040F377 /* nocommands.cpp */,
+ A704E8A11106045D00870157 /* otuhierarchycommand.h */,
+ A704E8A21106045D00870157 /* otuhierarchycommand.cpp */,
3792946E0F2E191800B9034A /* parsimonycommand.h */,
3792946F0F2E191800B9034A /* parsimonycommand.cpp */,
A7D176CD10F2558500159497 /* pcacommand.h */,
A7D176CF10F2558500159497 /* pcacommand.cpp in Sources */,
A7D86C7D10FE09AB00865661 /* formatcolumn.cpp in Sources */,
A7D86C8C10FE09FE00865661 /* formatphylip.cpp in Sources */,
+ A704E8A31106045D00870157 /* otuhierarchycommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
convert(temp, freq);
- temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "true"; }
+ temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
all = isTrue(temp);
if (abort == false) {
mothurOut("The default value for groups is all the groups in your groupfile.\n");
validCalculator->printCalc("shared", cout);
mothurOut("The label parameter is used to analyze specific labels in your input.\n");
- mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is true.\n");
+ mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
#include "mgclustercommand.h"
#include "preclustercommand.h"
#include "pcacommand.h"
+#include "otuhierarchycommand.h"
/*******************************************************/
commands["mgcluster"] = "mgcluster";
commands["pre.cluster"] = "pre.cluster";
commands["pca"] = "pca";
+ commands["otu.hierarchy"] = "otu.hierarchy";
}
/***********************************************************/
else if(commandName == "mgcluster") { command = new MGClusterCommand(optionString); }
else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); }
else if(commandName == "pca") { command = new PCACommand(optionString); }
+ else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
/***********************************************************************/
void HCluster::combineFile() {
try {
- int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
- char* inputBuffer;
- inputBuffer = new char[bufferSize];
- size_t numRead;
+ //int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
+ //char* inputBuffer;
+ //inputBuffer = new char[bufferSize];
+ //size_t numRead;
string tempDistFile = distfile + ".temp";
ofstream out;
openOutputFile(tempDistFile, out);
- FILE* in;
- in = fopen(distfile.c_str(), "rb");
+ //FILE* in;
+ //in = fopen(distfile.c_str(), "rb");
+ ifstream in;
+ openInputFile(distfile, in);
+
int first, second;
float dist;
//go through file pulling out distances related to rows merging
//if mergedMin contains distances add those back into file
- bool done = false;
- partialDist = "";
- while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
+ //bool done = false;
+ //partialDist = "";
+ //while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
//cout << "number of char read = " << numRead << endl;
//cout << inputBuffer << endl;
- if (numRead < bufferSize) { done = true; }
+ //if (numRead < bufferSize) { done = true; }
//parse input into individual distances
- int spot = 0;
- string outputString = "";
- while(spot < numRead) {
+ //int spot = 0;
+ //string outputString = "";
+ //while(spot < numRead) {
//cout << "spot = " << spot << endl;
- seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
+ // seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
//you read a partial distance
- if (nextDist.seq1 == -1) { break; }
-
- first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
+ // if (nextDist.seq1 == -1) { break; }
+ while (!in.eof()) {
+ //first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
//cout << "next distance = " << first << '\t' << second << '\t' << dist << endl;
//since file is sorted and mergedMin is sorted
//you can put the smallest distance from each through the code below and keep the file sorted
+ in >> first >> second >> dist; gobble(in);
+
//while there are still values in mergedMin that are smaller than the distance read from file
while (count < mergedMin.size()) {
}else if (mergedMin[count].seq2 == smallRow) {
smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
}else { //if no, write to temp file
- outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
+ //outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
+ out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
}
count++;
}else{ break; }
smallRowColValues[0][first] = dist;
}else { //if no, write to temp file
- outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
+ //outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
+ out << first << '\t' << second << '\t' << dist << endl;
}
}
- out << outputString;
- if(done) { break; }
- }
- fclose(in);
+ //out << outputString;
+ //if(done) { break; }
+ //}
+ //fclose(in);
+ in.close();
//if values in mergedMin are larger than the the largest in file then
while (count < mergedMin.size()) {
exit(1);
}
}
-/***********************************************************************/
+/***********************************************************************
seqDist HCluster::getNextDist(char* buffer, int& index, int size){
try {
seqDist next;
vector<seqDist> getSeqsAN();
void combineFile();
void processFile();
- seqDist getNextDist(char*, int&, int);
+ //seqDist getNextDist(char*, int&, int);
RAbundVector* rabund;
ListVector* list;
--- /dev/null
+/*
+ * otuhierarchycommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 1/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "otuhierarchycommand.h"
+
+//**********************************************************************************************************************
+OtuHierarchyCommand::OtuHierarchyCommand(string option){
+ try {
+ abort = false;
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string Array[] = {"list","label"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ listFile = validParameter.validFile(parameters, "list", true);
+ if (listFile == "not found") { mothurOut("list is a required parameter for the otu.hierarchy command."); mothurOutEndLine(); abort = true; }
+ else if (listFile == "not open") { abort = true; }
+
+
+ //check for optional parameter and set defaults
+ // ...at some point should added some additional type checking...
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { mothurOut("label is a required parameter for the otu.hierarchy command."); mothurOutEndLine(); abort = true; }
+ else {
+ splitAtDash(label, labels);
+ if (labels.size() != 2) { mothurOut("You must provide 2 labels."); mothurOutEndLine(); abort = true; }
+ }
+ }
+
+ }
+ catch(exception& e) {
+ errorOut(e, "OtuHierarchyCommand", "OtuHierarchyCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+void OtuHierarchyCommand::help(){
+ try {
+ mothurOut("The otu.hierarchy command is used to see how otus relate at two distances. \n");
+ mothurOut("The otu.hierarchy command parameters are list and label. Both parameters are required. \n");
+ mothurOut("The otu.hierarchy command should be in the following format: \n");
+ mothurOut("otu.hierarchy(list=yourListFile, label=yourLabels).\n");
+ mothurOut("Example otu.hierarchy(list=amazon.fn.list, label=0.01-0.03).\n");
+ mothurOut("The otu.hierarchy command outputs a .otu.hierarchy file which is described on the wiki.\n");
+ mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+ }
+ catch(exception& e) {
+ errorOut(e, "OtuHierarchyCommand", "help");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+OtuHierarchyCommand::~OtuHierarchyCommand(){}
+
+//**********************************************************************************************************************
+
+int OtuHierarchyCommand::execute(){
+ try {
+
+ if (abort == true) { return 0; }
+
+ //get listvectors that correspond to labels requested, (or use smart distancing to get closest listvector)
+ vector<ListVector> lists = getListVectors();
+
+ //determine which is little and which is big, putting little first
+ if (lists.size() == 2) {
+ //if big is first swap them
+ if (lists[0].getNumBins() < lists[1].getNumBins()) {
+ reverse(lists.begin(), lists.end());
+ }
+ }else{
+ mothurOut("error getting listvectors, unable to read 2 different vectors, check your label inputs."); mothurOutEndLine(); return 0;
+ }
+
+ //map sequences to bin number in the "little" otu
+ map<string, int> littleBins;
+ for (int i = 0; i < lists[0].getNumBins(); i++) {
+ string names = lists[0].get(i);
+
+ //parse bin
+ while (names.find_first_of(',') != -1) {
+ string name = names.substr(0,names.find_first_of(','));
+ names = names.substr(names.find_first_of(',')+1, names.length());
+ littleBins[name] = i;
+ }
+
+ //get last name
+ littleBins[names] = i;
+ }
+
+ ofstream out;
+ string outputFileName = getRootName(listFile) + lists[0].getLabel() + "-" + lists[1].getLabel() + ".otu.hierarchy";
+ openOutputFile(outputFileName, out);
+
+ //go through each bin in "big" otu and output the bins in "little" otu which created it
+ for (int i = 0; i < lists[1].getNumBins(); i++) {
+
+ string names = lists[1].get(i);
+
+ //output column 1
+ out << names << '\t';
+
+ map<int, int> bins; //bin numbers in little that are in this bin in big
+ map<int, int>::iterator it;
+
+ //parse bin
+ while (names.find_first_of(',') != -1) {
+ string name = names.substr(0,names.find_first_of(','));
+ names = names.substr(names.find_first_of(',')+1, names.length());
+ bins[littleBins[name]] = littleBins[name];
+ }
+
+ //get last name
+ bins[littleBins[names]] = littleBins[names];
+
+ string col2 = "";
+ for (it = bins.begin(); it != bins.end(); it++) {
+ col2 += lists[0].get(it->first) + "\t";
+ }
+
+ //output column 2
+ out << col2 << endl;
+ }
+
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "OtuHierarchyCommand", "execute");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+//returns a vector of listVectors where "little" vector is first
+vector<ListVector> OtuHierarchyCommand::getListVectors() {
+ try {
+
+ int pos; //to use in smart distancing, position of last read in file
+ int lastPos;
+ vector<ListVector> lists;
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //open file
+ ifstream in;
+ openInputFile(listFile, in);
+
+ //get first list vector in file
+ ListVector* list = NULL;
+ string lastLabel = "";
+ if (!in.eof()) {
+ pos = in.tellg();
+ lastPos = pos;
+ list = new ListVector(in);
+ gobble(in);
+ lastLabel = list->getLabel();
+ }
+
+ while ((list != NULL) && (userLabels.size() != 0)) {
+
+ //is this a listvector that we want?
+ if(labels.count(list->getLabel()) == 1){
+
+ //make copy of listvector
+ ListVector temp(*list);
+ lists.push_back(temp);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+ }
+
+ //you have a label the user want that is smaller than this label and the last label has not already been processed
+ if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = list->getLabel();
+ int savePos = in.tellg();
+
+ //get smart distance line
+ delete list;
+ in.seekg(lastPos);
+ if (!in.eof()) {
+ list = new ListVector(in);
+ }else { list = NULL; }
+
+ //make copy of listvector
+ ListVector temp(*list);
+ lists.push_back(temp);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+
+ //restore real lastlabel to save below
+ list->setLabel(saveLabel);
+ in.seekg(savePos);
+ }
+
+ lastLabel = list->getLabel();
+ lastPos = pos;
+
+ //get next line
+ delete list;
+ if (!in.eof()) {
+ pos = in.tellg();
+ list = new ListVector(in);
+ gobble(in);
+ }else { list = NULL; }
+ }
+
+
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+ needToRun = true;
+ }else {
+ mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ if (list != NULL) { delete list; }
+
+ in.seekg(lastPos);
+ if (!in.eof()) {
+ list = new ListVector(in);
+
+ //make copy of listvector
+ ListVector temp(*list);
+ lists.push_back(temp);
+
+ delete list;
+ }
+ }
+
+
+ return lists;
+ }
+ catch(exception& e) {
+ errorOut(e, "OtuHierarchyCommand", "getListVectors");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+
+
+
+
--- /dev/null
+#ifndef OTUHIERARCHYCOMMAND_H
+#define OTUHIERARCHYCOMMAND_H
+/*
+ * otuhierarchycommand.h
+ * Mothur
+ *
+ * Created by westcott on 1/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "listvector.hpp"
+
+//**********************************************************************************************************************
+
+class OtuHierarchyCommand : public Command {
+
+public:
+ OtuHierarchyCommand(string);
+ ~OtuHierarchyCommand();
+ int execute();
+ void help();
+
+private:
+ bool abort;
+ set<string> labels; //holds labels to be used
+ string label, listFile;
+
+ vector<ListVector> getListVectors();
+
+};
+
+//**********************************************************************************************************************
+
+#endif
+
+
globaldata->Groups = Groups;
}
- string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "true"; }
+ string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
all = isTrue(temp);
if (abort == false) {
mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
mothurOut("The default value for groups is all the groups in your groupfile.\n");
mothurOut("The label parameter is used to analyze specific labels in your input.\n");
- mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is true.\n");
+ mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");