mothurOut("The get.sharedotu command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n");
mothurOut("The get.sharedotu command should be in the following format: get.sabund(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n");
mothurOut("Example get.sharedotu(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n");
+ mothurOut("The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n");
mothurOut("The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n");
mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
}
openOutputFile(outputFileNames, outNames);
bool wroteSomething = false;
+ int num = 0;
//go through each bin, find out if shared
for (int i = 0; i < shared->getNumBins(); i++) {
//find group
string seqGroup = groupMap->getGroup(name);
if (output != "accnos") {
- namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i)));
+ namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i+1)));
}else { namesOfSeqsInThisBin.push_back(name); }
if (seqGroup == "not found") { mothurOut(name + " is not in your groupfile. Please correct."); mothurOutEndLine(); exit(1); }
if (sharedByAll) {
string seqGroup = groupMap->getGroup(names);
if (output != "accnos") {
- namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i)));
+ namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
}else { namesOfSeqsInThisBin.push_back(names); }
if (seqGroup == "not found") { mothurOut(names + " is not in your groupfile. Please correct."); mothurOutEndLine(); exit(1); }
if (sharedByAll) {
wroteSomething = true;
+ num++;
//output list of names
for (int j = 0; j < namesOfSeqsInThisBin.size(); j++) {
if (!wroteSomething) {
remove(outputFileNames.c_str());
- string outputString = " - No otus shared by groups";
+ string outputString = "\t" + toString(num) + " - No otus shared by groups";
string groupString = "";
for (int h = 0; h < Groups.size(); h++) {
outputString += groupString + ".";
mothurOut(outputString); mothurOutEndLine();
- }else { mothurOutEndLine(); }
+ }else { mothurOut("\t" + toString(num)); mothurOutEndLine(); }
//if fasta file provided output new fasta file
if ((fastafile != "") && wroteSomething) {
}
//cout << "here3" << endl;
linkTable[colSpot].erase(size);
- linkTable.erase(linkTable.begin()+rowSpot); //delete col
+ //linkTable.erase(linkTable.begin()+rowSpot); //delete row
//printInfo();
//update activerows
activeLinks.erase(smallRow);
activeLinks.erase(smallCol);
-
- if(rowSpot>colSpot) { activeLinks[size] = colSpot; }
- else{ activeLinks[size] = colSpot; }
+ activeLinks[size] = colSpot;
//adjust everybody elses spot since you deleted - time vs. space
- for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
- if (it->second > rowSpot) { activeLinks[it->first]--; }
- }
+ //for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
+ //if (it->second > rowSpot) { activeLinks[it->first]--; }
+ //}
//cout << "here4" << endl;
class RAbundVector;
class ListVector;
-/************************************************************/
-struct clusterNode {
- int numSeq;
- int parent;
- int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
- clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
-};
/***********************************************************************/
class HCluster {
public:
HCluster(RAbundVector*, ListVector*);
+ ~HCluster(){};
bool update(int, int, float);
//string getTag();
mothurOut("Error: no list vector!"); mothurOutEndLine(); return 0;
}
-
-
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
oldRAbund = *rabund;
rabundFile.close();
listFile.close();
+ delete cluster;
//if (isTrue(timing)) {
mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster. "); mothurOutEndLine();
//}
IntNode* right;
};
+/************************************************************/
+struct clusterNode {
+ int numSeq;
+ int parent;
+ int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
+ clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
+};
+
/***********************************************************************/
// snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
groupMap = globaldata->gGroupmap;
+ //if hte user has not specified any groups then use them all
+ if (globaldata->Groups.size() == 0) {
+ groups = groupMap->namesOfGroups;
+ }else{ //they have specified groups
+ groups = globaldata->Groups;
+ }
+
//fill filehandles with neccessary ofstreams
int i;
ofstream* temp;
- for (i=0; i<groupMap->getNumGroups(); i++) {
+ for (i=0; i<groups.size(); i++) {
temp = new ofstream;
- filehandles[groupMap->namesOfGroups[i]] = temp;
+ filehandles[groups[i]] = temp;
}
//set fileroot
fileroot = getRootName(globaldata->getListFile());
//clears file before we start to write to it below
- for (int i=0; i<groupMap->getNumGroups(); i++) {
- remove((fileroot + groupMap->namesOfGroups[i] + ".rabund").c_str());
+ for (int i=0; i<groups.size(); i++) {
+ remove((fileroot + groups[i] + ".rabund").c_str());
}
}
string lastLabel = SharedList->getLabel();
vector<SharedRAbundVector*> lookup;
- if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) {
+ if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine();
out.close();
return 1;
}
+ //if user has specified groups make new groupfile for them
+ if (globaldata->Groups.size() != 0) { //make new group file
+ string groups = "";
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ groups += globaldata->Groups[i] + ".";
+ }
+
+ string newGroupFile = getRootName(globaldata->inputFileName) + groups + "groups";
+ ofstream outGroups;
+ openOutputFile(newGroupFile, outGroups);
+
+ vector<string> names = groupMap->getNamesSeqs();
+ string groupName;
+ for (int i = 0; i < names.size(); i++) {
+ groupName = groupMap->getGroup(names[i]);
+ if (isValidGroup(groupName, globaldata->Groups)) {
+ outGroups << names[i] << '\t' << groupName << endl;
+ }
+ }
+ outGroups.close();
+ }
+
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
set<string> userLabels = globaldata->labels;
while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
-
if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
-
+
lookup = SharedList->getSharedRAbundVector();
mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
}
//**********************************************************************************************************************
+
+bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
+ try {
+ for (int i = 0; i < groups.size(); i++) {
+ if (groupname == groups[i]) { return true; }
+ }
+
+ return false;
+ }
+ catch(exception& e) {
+ errorOut(e, "SharedCommand", "isValidGroup");
+ exit(1);
+ }
+}
+/************************************************************/
+
+
private:
void printSharedData(vector<SharedRAbundVector*>);
void createMisMatchFile();
+ bool isValidGroup(string, vector<string>);
+
GlobalData* globaldata;
ReadOTUFile* read;
SharedListVector* SharedList;
InputData* input;
GroupMap* groupMap;
+ vector<string> groups;
ofstream out;
string filename, fileroot;
bool firsttime;
SharedUtil* util;
util = new SharedUtil();
vector<SharedRAbundVector*> lookup;
+ vector<SharedRAbundVector*> lookup2;
map<string, SharedRAbundVector*> finder;
+ map<string, SharedRAbundVector*>::iterator it;
string group, names, name;
-
+
util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-
delete util;
- for (int i = 0; i < globaldata->Groups.size(); i++) {
+ for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) {
SharedRAbundVector* temp = new SharedRAbundVector(data.size());
- finder[globaldata->Groups[i]] = temp;
- finder[globaldata->Groups[i]]->setLabel(label);
- finder[globaldata->Groups[i]]->setGroup(globaldata->Groups[i]);
+ finder[globaldata->gGroupmap->namesOfGroups[i]] = temp;
+ finder[globaldata->gGroupmap->namesOfGroups[i]]->setLabel(label);
+ finder[globaldata->gGroupmap->namesOfGroups[i]]->setGroup(globaldata->gGroupmap->namesOfGroups[i]);
//*temp = getSharedRAbundVector(globaldata->Groups[i]);
- lookup.push_back(finder[globaldata->Groups[i]]);
+ lookup.push_back(finder[globaldata->gGroupmap->namesOfGroups[i]]);
}
-
+//cout << "after blanks" << endl;
//fill vectors
for(int i=0;i<numBins;i++){
names = get(i);
name = names.substr(0,names.find_first_of(','));
names = names.substr(names.find_first_of(',')+1, names.length());
group = groupmap->getGroup(name);
+//cout << i << '\t' << name << '\t' << group << endl;
if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); }
finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
}
//get last name
group = groupmap->getGroup(names);
+//cout << i << '\t' << names << '\t' << group << endl;
if(group == "not found") { mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); }
finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
}
- return lookup;
+ if (globaldata->Groups.size() == globaldata->gGroupmap->namesOfGroups.size()) { //no groups specified
+ lookup2 = lookup;
+ }else{ //delete unwanted groups
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector(*finder[globaldata->Groups[i]]);
+ lookup2.push_back(temp);
+ delete finder[globaldata->Groups[i]]; //so we don't get dup memory
+ }
+ }
+
+ return lookup2;
}
catch(exception& e) {
errorOut(e, "SharedListVector", "getSharedRAbundVector");
SharedRAbundVector();
SharedRAbundVector(int);
//SharedRAbundVector(string, vector<int>);
- SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){};
+ SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group), index(bv.index){};
SharedRAbundVector(ifstream&);
~SharedRAbundVector();
else {
//valid paramters for this command
- string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"};
+ string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles", "qtrim"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
convert(temp, qThreshold);
+
+ temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; }
+ qtrim = isTrue(temp);
temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
convert(temp, qAverage);
void TrimSeqsCommand::help(){
try {
mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
- mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n");
+ mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n");
mothurOut("The fasta parameter is required.\n");
mothurOut("The flip parameter .... The default is 0.\n");
mothurOut("The oligos parameter .... The default is "".\n");
mothurOut("The maxhomop parameter .... The default is 0.\n");
mothurOut("The minlength parameter .... The default is 0.\n");
mothurOut("The maxlength parameter .... The default is 0.\n");
+ mothurOut("The qfile parameter .....\n");
+ mothurOut("The qthreshold parameter .... The default is 0.\n");
+ mothurOut("The qaverage parameter .... The default is 0.\n");
+ mothurOut("The allfiles parameter .... The default is F.\n");
+ mothurOut("The qtrim parameter .... The default is F.\n");
mothurOut("The trim.seqs command should be in the following format: \n");
mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
- mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+ mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
+ mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
}
catch(exception& e) {
if(qFileName != ""){
if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
+ if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) {
+ success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+ }
if(!success) { trashCode += 'q'; }
}
if(barcodes.size() != 0){
bool abort;
string fastaFile, oligoFile, qFileName;
- bool flip, allFiles;
+ bool flip, allFiles, qtrim;
int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, qThreshold, qAverage;
vector<string> forPrimer, revPrimer;
map<string, int> barcodes;