//reset calc for next command
globaldata->setCalc("");
-
-
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
int pid2 = fork();
if(pid2 > 0){
driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);
- //system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
appendFiles(distFile+"tempa", distFile);
- //system(("rm " + distFile + "tempa").c_str());
remove((distFile + "tempa").c_str());
}
else{
driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);
- //system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
appendFiles(distFile+"tempb", distFile);
- //system(("rm " + distFile + "tempb").c_str());
remove((distFile + "tempb").c_str());
}
wait(NULL);
int pid3 = fork();
if(pid3 > 0){
driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);
- //system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
appendFiles(distFile+"tempc", distFile);
- //system(("rm " + distFile + "tempc").c_str());
remove((distFile + "tempc").c_str());
}
else{
driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);
- //system(("cat " + distFile + "tempd" + " >> " + distFile).c_str());
appendFiles(distFile+"tempd", distFile);
- //system(("rm " + distFile + "tempd").c_str());
remove((distFile + "tempd").c_str());
}
wait(NULL);
if(dist <= cutoff){
distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
-//cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
}
}
exit(1);
}
}
+/**************************************************************************************************/
\ No newline at end of file
cout << "The deconvolute command parameter is fasta and it is required." << "\n";
cout << "The deconvolute command should be in the following format: " << "\n";
cout << "deconvolute(fasta=yourFastaFile) " << "\n";
+ }else if (globaldata->helpRequest == "distance") {
+ cout << "The distance command reads a file containing sequences and creates a distance file." << "\n";
+ cout << "The distance command parameters are fasta, phylip, clustal, nexus, calc, ends, cutoff and processors. " << "\n";
+ cout << "You must use one of the following parameters for your filename: fasta, phylip, clustal or nexus. " << "\n";
+ cout << "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n";
+ cout << "The ends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T." << "\n";
+ cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n";
+ cout << "The processors parameter allows you to specify number of processors to use. The default is 1, but you can use up to 4 processors." << "\n";
+ cout << "The distance command should be in the following format: " << "\n";
+ cout << "distance(fasta=yourFastaFile, calc=yourCalc, ends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n";
+ cout << "Example distance(fasta=amazon.fasta, calc=eachgap, ends=F, cutoff= 2.0, processors=3)." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n";
}else if (globaldata->helpRequest == "collect.single") {
cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n";
cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n";
}
}
/***********************************************************************/
-
+//this is used when you don't need the order vector
vector<SharedRAbundVector*> InputData::getSharedRAbundVectors(){
try {
if(fileHandle){
if (format == "sharedfile") {
- SharedOrder = new SharedOrderVector(fileHandle);
- if (SharedOrder != NULL) {
- return SharedOrder->getSharedRAbundVector();
+ SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle);
+ if (SharedRAbund != NULL) {
+ return SharedRAbund->getSharedRAbundVectors();
}
}else if (format == "shared") {
SharedList = new SharedListVector(fileHandle);
}
}
+
/***********************************************************************/
SAbundVector* InputData::getSAbundVector(){
globaldata->gMatrix = matrix; //save matrix for coverage commands
}else {
read->read(nameMap);
+ //to prevent memory leak
+ if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
globaldata->gListVector = read->getListVector();
+ if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; }
globaldata->gSparseMatrix = read->getMatrix();
}
return 0;
groupMap->readMap();
//if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
- globaldata->gGroupmap = groupMap;
-
+ globaldata->gGroupmap = groupMap;
shared = new SharedCommand();
shared->execute();
+
parselist = new ParseListCommand();
parselist->execute();
}
groupName = groupmap->getGroup(names);
order->push_back(i, binSize, groupName);
}
+
random_shuffle(order->begin(), order->end());
return order;
}
vector<SharedRAbundVector*> lookup;
util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
- util->getSharedVectors(globaldata->Groups, lookup, this->getSharedOrderVector());
-
+
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ *temp = getSharedRAbundVector(globaldata->Groups[i]);
+ lookup.push_back(temp);
+ }
+
return lookup;
}
catch(exception& e) {
maxRank = 0;
for(int i=0;i<data.size();i++){
+
if(data[i].bin != -1){
numSeqs++;
}
}
+ //vector<individual> hold(numSeqs, 0);
- vector<individual> hold(numSeqs);
-
- for(int i=0;i<numSeqs;i++){
- if(data[i].bin != -1){
- hold[data[i].bin].bin = hold[data[i].bin].bin+1;
- }
- }
+ //for(int i=0;i<numSeqs;i++){
+ //if(data[i].bin != -1){
+ //hold[data[i].bin].bin = hold[data[i].bin].bin+1;
+ //}
+ //}
for(int i=0;i<numSeqs;i++){
if(data[i].bin > numBins) { numBins = data[i].bin; }
#include "sharedrabundvector.h"
#include "sabundvector.hpp"
#include "ordervector.hpp"
+#include "sharedutilities.h"
/***********************************************************************/
}
-/***********************************************************************
-
-
+/***********************************************************************/
+//reads a shared file
SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
try {
- int i, num;
- string holdLabel, group
- individual newGuy;
+ globaldata = GlobalData::getInstance();
- f >> label >> group >> num;
+ if (globaldata->gGroupmap == NULL) { groupmap = new GroupMap(); }
- //initialize data
- for (i=0; i<hold; i++) {
- newGuy = new individual;
- newGuy.abundance = 0;
- newGuy.bin = i;
- data.push_back(newGuy);
+ int num, inputData, pos, count;
+ count = 0;
+ string holdLabel, nextLabel, groupN;
+ individual newguy;
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup.clear();
+
+ //read in first row since you know there is at least 1 group.
+ f >> label >> groupN >> num;
+ holdLabel = label;
+
+ //add new vector to lookup
+ lookup.push_back(new SharedRAbundVector(num));
+ lookup[0]->setLabel(label);
+ lookup[0]->setGroup(groupN);
+
+ if (globaldata->gGroupmap == NULL) {
+ //save group in groupmap
+ groupmap->namesOfGroups.push_back(groupN);
+ groupmap->groupIndex[groupN] = 0;
}
- int inputData;
-
- for(int i=0;i<hold;i++){
+
+ //fill vector. data = first sharedrabund in file
+ for(int i=0;i<num;i++){
f >> inputData;
- set(i, inputData);
+
+ lookup[0]->push_back(inputData, i, groupN); //abundance, bin, group
+ push_back(inputData, i, groupN);
+ numSeqs += inputData;
+ numBins++;
+ if (inputData > maxRank) { maxRank = inputData; }
+
}
+
+ //save position in file in case next line is a new label.
+ pos = f.tellg();
+
+ if (f.eof() != true) { f >> nextLabel; }
+
+ //read the rest of the groups info in
+ while ((nextLabel == holdLabel) && (f.eof() != true)) {
+ f >> groupN >> num;
+ count++;
+
+ if (globaldata->gGroupmap == NULL) {
+ //save group in groupmap
+ groupmap->namesOfGroups.push_back(groupN);
+ groupmap->groupIndex[groupN] = count;
+ }
+
+ //add new vector to lookup
+ lookup.push_back(new SharedRAbundVector(num));
+ lookup[count]->setLabel(label);
+ lookup[count]->setGroup(groupN);
+
+ //fill vector.
+ for(int i=0;i<num;i++){
+ f >> inputData;
+ lookup[count]->push_back(inputData, i, groupN); //abundance, bin, group
+ }
+
+ //save position in file in case next line is a new label.
+ pos = f.tellg();
+
+ if (f.eof() != true) { f >> nextLabel; }
+ }
+
+ //put file pointer back since you are now at a new distance label
+ f.seekg(pos, ios::beg);
+
+ if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; }
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
return *this;
}
/***********************************************************************/
+vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
+ try {
+ SharedUtil* util;
+ util = new SharedUtil();
+
+ util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
+
+ for (int i = 0; i < lookup.size(); i++) {
+ //if this sharedrabund is not from a group the user wants then delete it.
+ if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) {
+ delete lookup[i];
+ lookup.erase(lookup.begin()+i);
+ i--;
+ }
+ }
+
+ return lookup;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/***********************************************************************/
RAbundVector SharedRAbundVector::getRAbundVector() {
try {
#include "sharedordervector.h"
#include "sharedsabundvector.h"
#include "rabundvector.hpp"
+#include "groupmap.h"
/* This class is a child to datavector. It represents OTU information at a certain distance.
It is similiar to an rabundvector except each member of data knows which group it belongs to.
An individual which knows the OTU from which it came,
the group it is in and its abundance. */
-
+class GlobalData;
class SharedRAbundVector : public DataVector {
SharedRAbundVector(int);
//SharedRAbundVector(string, vector<int>);
SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){};
- //SharedRAbundVector(ifstream&);
+ SharedRAbundVector(ifstream&);
~SharedRAbundVector();
int getNumBins();
SharedOrderVector getSharedOrderVector();
SharedSAbundVector getSharedSAbundVector();
SharedRAbundVector getSharedRAbundVector();
+ vector<SharedRAbundVector*> getSharedRAbundVectors();
private:
vector<individual> data;
+ vector<SharedRAbundVector*> lookup;
+ GlobalData* globaldata;
+ GroupMap* groupmap;
int maxRank;
int numBins;
int numSeqs;
//if the user only entered invalid groups
if (userGroups.size() == 0) {
- cout << "When using the groups parameter you must have at least 1 valid groups. I will run the command using all the groups in your groupfile." << endl;
+ cout << "You provided no valid groups. I will run the command using all the groups in your groupfile." << endl;
for (int i = 0; i < allGroups.size(); i++) {
userGroups.push_back(allGroups[i]);
}
void setGroups(vector<string>&, vector<string>&, string&, int&, string); //globaldata->Groups, your tree or group map, allgroups, numGroups, mode
void getCombos(vector<string>&, vector<string>, int&); //groupcomb, globaldata->Groups, numcomb
void updateGroupIndex(vector<string>&, map<string, int>&); //globaldata->Groups, groupmap->groupIndex
+ bool isValidGroup(string, vector<string>);
private:
- bool isValidGroup(string, vector<string>);
+
};
/**************************************************************************************************/
if (treeCalculators.size() == 0) { return 0; }
if (format == "sharedfile") {
+ //you have groups
read = new ReadOTUFile(globaldata->inputFileName);
read->read(&*globaldata);
input = globaldata->ginput;
- order = input->getSharedOrderVector();
+ lookup = input->getSharedRAbundVectors();
}else {
//you are using a list and a groupfile
read = new ReadOTUFile(globaldata->inputFileName);
input = globaldata->ginput;
SharedList = globaldata->gSharedList;
- order = SharedList->getSharedOrderVector();
+ lookup = SharedList->getSharedRAbundVector();
}
- //set users groups
- util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
+ if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; }
+
numGroups = globaldata->Groups.size();
groupNames = "";
for (int i = 0; i < numGroups; i++) { groupNames += globaldata->Groups[i]; }
tmap->makeSim(globaldata->gGroupmap);
globaldata->gTreemap = tmap;
- while(order != NULL){
+ while(lookup[0] != NULL){
- if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){
+ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){
- cout << order->getLabel() << '\t' << count << endl;
- util->getSharedVectors(globaldata->Groups, lookup, order); //fills group vectors from order vector.
+ cout << lookup[0]->getLabel() << '\t' << count << endl;
//for each calculator
for(int i = 0 ; i < treeCalculators.size(); i++) {
for (int g = 0; g < numGroups; g++) { index[g] = g; }
//create a new filename
- outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + order->getLabel() + ".tre";
+ outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
for (int k = 0; k < lookup.size(); k++) {
for (int l = k; l < lookup.size(); l++) {
}
//get next line to process
- if (format == "sharedfile") {
- order = input->getSharedOrderVector();
- }else {
- //you are using a list and a groupfile
- SharedList = input->getSharedListVector(); //get new list vector to process
- if (SharedList != NULL) {
- order = SharedList->getSharedOrderVector(); //gets new order vector with group info.
- }else {
- break;
- }
- }
+ lookup = input->getSharedRAbundVectors();
count++;
}