A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
+ A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */; };
A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
+ A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; };
A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = "<group>"; };
+ A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lefsecommand.cpp; sourceTree = "<group>"; };
+ A7190B211768E0DF00A9AFA6 /* lefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lefsecommand.h; sourceTree = "<group>"; };
A71CB15E130B04A2001E7287 /* anosimcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = anosimcommand.cpp; sourceTree = "<group>"; };
A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = anosimcommand.h; sourceTree = "<group>"; };
A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergegroupscommand.h; sourceTree = "<group>"; };
A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearmemorycommand.cpp; sourceTree = "<group>"; };
A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurmetastats.h; sourceTree = "<group>"; };
A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurmetastats.cpp; sourceTree = "<group>"; };
+ A741744A175CD9B1007DF49B /* makelefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makelefsecommand.cpp; sourceTree = "<group>"; };
+ A741744B175CD9B1007DF49B /* makelefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makelefsecommand.h; sourceTree = "<group>"; };
A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencecountparser.cpp; sourceTree = "<group>"; };
A741FAD415D168A00067BCC5 /* sequencecountparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencecountparser.h; sourceTree = "<group>"; };
A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kruskalwalliscommand.cpp; sourceTree = "<group>"; };
A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */,
A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */,
- A70056E8156A93E300924A2D /* getotulabelscommand.h */,
- A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */,
A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */,
A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */,
A7E9B70112D37EC400DA6239 /* getotuscommand.h */,
A7E9B70012D37EC400DA6239 /* getotuscommand.cpp */,
+ A70056E8156A93E300924A2D /* getotulabelscommand.h */,
+ A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B70312D37EC400DA6239 /* getrabundcommand.h */,
A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */,
A7E9B70512D37EC400DA6239 /* getrelabundcommand.h */,
A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */,
A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */,
A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */,
+ A7190B211768E0DF00A9AFA6 /* lefsecommand.h */,
+ A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */,
A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */,
A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */,
A7A0671C156294810095C8C5 /* listotulabelscommand.h */,
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */,
+ A741744B175CD9B1007DF49B /* makelefsecommand.h */,
+ A741744A175CD9B1007DF49B /* makelefsecommand.cpp */,
A7E6F69C17427CF2006775E2 /* makelookupcommand.h */,
A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */,
A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */,
A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */,
A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */,
+ A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */,
+ A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
//for each word
for (int i = 0; i < numKmers; i++) {
+ //m->mothurOut("[DEBUG]: kmer = " + toString(i) + "\n");
+
if (m->control_pressed) { break; }
#ifdef USE_MPI
}
}
+ if (m->debug) { m->mothurOut("[DEBUG]: about to generateWordPairDiffArr\n"); }
generateWordPairDiffArr();
+ if (m->debug) { m->mothurOut("[DEBUG]: done generateWordPairDiffArr\n"); }
//save probabilities
if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
taxonomy.clear();
m->readTax(file, taxonomy);
- map<string, string> tempTaxonomy;
+
+ //commented out to save time with large templates. 6/12/13
+ //map<string, string> tempTaxonomy;
for (map<string, string>::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) {
- if (m->inUsersGroups(itTax->first, names)) {
- phyloTree->addSeqToTree(itTax->first, itTax->second);
- tempTaxonomy[itTax->first] = itTax->second;
- }else {
- m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
- }
+ //if (m->inUsersGroups(itTax->first, names)) {
+ phyloTree->addSeqToTree(itTax->first, itTax->second);
+ if (m->control_pressed) { break; }
+ //tempTaxonomy[itTax->first] = itTax->second;
+ // }else {
+ // m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n");
+ //}
}
- taxonomy = tempTaxonomy;
+ //taxonomy = tempTaxonomy;
#endif
phyloTree->assignHeirarchyIDs(0);
vector<string> Classify::parseTax(string tax) {
try {
vector<string> taxons;
-
- tax = tax.substr(0, tax.length()-1); //get rid of last ';'
-
- //parse taxonomy
- string individual;
- while (tax.find_first_of(';') != -1) {
- individual = tax.substr(0,tax.find_first_of(';'));
- tax = tax.substr(tax.find_first_of(';')+1, tax.length());
- taxons.push_back(individual);
-
- }
- //get last one
- taxons.push_back(tax);
-
- return taxons;
+ m->splitAtChar(tax, taxons, ';');
+ return taxons;
}
catch(exception& e) {
m->errorOut(e, "Classify", "parseTax");
#include "sparcccommand.h"
#include "makelookupcommand.h"
#include "renameseqscommand.h"
+#include "makelefsecommand.h"
/*******************************************************/
commands["sparcc"] = "sparcc";
commands["make.lookup"] = "make.lookup";
commands["rename.seqs"] = "rename.seqs";
+ commands["make.lefse"] = "make.lefse";
}
else if(commandName == "sparcc") { command = new SparccCommand(optionString); }
else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); }
else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); }
+ else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); }
else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); }
else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); }
+ else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "sparcc") { shellcommand = new SparccCommand(); }
else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); }
else if(commandName == "rename.seqs") { shellcommand = new RenameSeqsCommand(); }
+ else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
return lookup;
}
catch(exception& e) {
- m->errorOut(e, "CreateDatabaseCommand", "getList");
+ m->errorOut(e, "CreateDatabaseCommand", "getShared");
exit(1);
}
}
--- /dev/null
+//
+// lefsecommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/12/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "lefsecommand.h"
--- /dev/null
+//
+// lefsecommand.h
+// Mothur
+//
+// Created by SarahsWork on 6/12/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__lefsecommand__
+#define __Mothur__lefsecommand__
+
+#include "command.hpp"
+
+/*
+ Columns = groups, rows are OTUs, class = design??
+
+ From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload
+ Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
+
+ Step 1: the Kruskall-Wallis test analyzes all features, testing whether the values in different classes are differentially distributed. Features violating the null hypothesis are further analyzed in Step 2.
+ Step 2: the pairwise Wilcoxon test checks whether all pairwise comparisons between subclasses within different classes significantly agree with the class level trend.
+ Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
+*/
+
+#endif /* defined(__Mothur__lefsecommand__) */
}
//roligo = reverseOligo(roligo);
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
+
group = "";
// get rest of line in case there is a primer name
}
oligosPair newPrimer(foligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
//check for repeat barcodes
string tempPair = foligo+roligo;
--- /dev/null
+//
+// makelefse.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "makelefsecommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeLefseCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
+ CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLefseCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
+ helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label. The shared or relabund are required.\n";
+ helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
+ helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
+ helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
+ helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
+ helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
+ helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
+ helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
+ helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLefseCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "lefse") { pattern = "[filename],[distance],lefse"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLefseCommand::MakeLefseCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["lefse"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLefseCommand::MakeLefseCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["lefse"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["design"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("constaxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("relabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["relabund"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { abort = true; }
+ else if (designfile == "not found") { designfile = ""; }
+ else { m->setDesignFile(designfile); }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { m->setSharedFile(sharedfile); }
+
+ relabundfile = validParameter.validFile(parameters, "relabund", true);
+ if (relabundfile == "not open") { abort = true; }
+ else if (relabundfile == "not found") { relabundfile = ""; }
+ else { m->setRelAbundFile(relabundfile); }
+
+ constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+ if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+ else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
+
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else {
+ m->splitAtDash(groups, Groups);
+ m->setGroups(Groups);
+ }
+
+
+ if ((relabundfile == "") && (sharedfile == "")) {
+ //is there are current file available for either of these?
+ //give priority to shared, then relabund
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ relabundfile = m->getRelAbundFile();
+ if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
+
+ if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = ""; }
+
+ scale = validParameter.validFile(parameters, "scale", false); if (scale == "not found") { scale = "totalgroup"; }
+
+ if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
+ m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MakeLefseCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ map<string, consTax2> consTax;
+ if (constaxonomyfile != "") { m->readConsTax(constaxonomyfile, consTax); }
+
+ if (m->control_pressed) { return 0; }
+
+ if (sharedfile != "") {
+ inputFile = sharedfile;
+ vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
+ runRelabund(consTax, lookup);
+ }else {
+ inputFile = relabundfile;
+ vector<SharedRAbundFloatVector*> lookup = getRelabund();
+ runRelabund(consTax, lookup);
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
+ try {
+ if (outputDir == "") { outputDir = m->hasPath(inputFile); }
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFile = getOutputFileName("lefse",variables);
+ outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
+
+ ofstream out;
+ m->openOutputFile(outputFile, out);
+
+ GroupMap* designMap = NULL;
+ if (designfile != "") {
+ designMap = new GroupMap(designfile);
+ designMap->readDesignMap();
+
+ out << "treatment\t";
+ for (int i = 0; i < lookup.size(); i++) {
+ string treatment = designMap->getGroup(lookup[i]->getGroup());
+ if (treatment == "not found") {
+ m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n");
+ }else { out << treatment << '\t'; }
+ }
+ out << endl;
+ }
+
+ out << "group\t";
+ for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
+ out << endl;
+
+ for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
+ if (m->control_pressed) { break; }
+ string nameOfOtu = m->currentBinLabels[i];
+ if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
+ map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
+ if (it != consTax.end()) {
+ nameOfOtu = it->second.taxonomy;
+ //add sanity check abundances here??
+ string fixedName = "";
+ //remove confidences and change ; to |
+ m->removeConfidences(nameOfOtu);
+ for (int j = 0; j < nameOfOtu.length()-1; j++) {
+ if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
+ else { fixedName += nameOfOtu[j]; }
+ }
+ nameOfOtu = fixedName;
+ }else {
+ m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
+ }
+
+ }
+ //print name
+ out << nameOfOtu << '\t';
+
+ //print out relabunds for each otu
+ for (int j = 0; j < lookup.size(); j++) { out << lookup[j]->getAbundance(i) << '\t'; }
+ out << endl;
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
+ try {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
+ string lastLabel = templookup[0]->getLabel();
+ vector<SharedRAbundFloatVector*> lookup;
+
+ if (label == "") { label = lastLabel; }
+ else {
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((templookup[0] != NULL) && (userLabels.size() != 0)) {
+ if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
+
+ if(labels.count(templookup[0]->getLabel()) == 1){
+ processedLabels.insert(templookup[0]->getLabel());
+ userLabels.erase(templookup[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = templookup[0]->getLabel();
+
+ for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
+ templookup = input.getSharedRAbundVectors(lastLabel);
+
+ processedLabels.insert(templookup[0]->getLabel());
+ userLabels.erase(templookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ templookup[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = templookup[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
+ templookup = input.getSharedRAbundVectors();
+ }
+
+
+ if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < templookup.size(); i++) { if (templookup[i] != NULL) { delete templookup[i]; } }
+ templookup = input.getSharedRAbundVectors(lastLabel);
+ }
+ }
+
+ for (int i = 0; i < templookup.size(); i++) {
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel(templookup[i]->getLabel());
+ temp->setGroup(templookup[i]->getGroup());
+ lookup.push_back(temp);
+ }
+
+ //convert to relabund
+ for (int i = 0; i < templookup.size(); i++) {
+ for (int j = 0; j < templookup[i]->getNumBins(); j++) {
+
+ if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; } return lookup; }
+
+ int abund = templookup[i]->getAbundance(j);
+ float relabund = 0.0;
+
+ if (scale == "totalgroup") {
+ relabund = abund / (float) templookup[i]->getNumSeqs();
+ }else if (scale == "totalotu") {
+ //calc the total in this otu
+ int totalOtu = 0;
+ for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
+ relabund = abund / (float) totalOtu;
+ }else if (scale == "averagegroup") {
+ relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
+ }else if (scale == "averageotu") {
+ //calc the total in this otu
+ int totalOtu = 0;
+ for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
+ float averageOtu = totalOtu / (float) templookup.size();
+
+ relabund = abund / (float) averageOtu;
+ }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ lookup[i]->push_back(relabund, lookup[i]->getGroup());
+ }
+ }
+
+ for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; }
+
+ return lookup;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
+ try {
+ InputData input(relabundfile, "relabund");
+ vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
+ string lastLabel = lookupFloat[0]->getLabel();
+
+ if (label == "") { label = lastLabel; return lookupFloat; }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
+
+ if (m->control_pressed) { return lookupFloat; }
+
+ if(labels.count(lookupFloat[0]->getLabel()) == 1){
+ processedLabels.insert(lookupFloat[0]->getLabel());
+ userLabels.erase(lookupFloat[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookupFloat[0]->getLabel();
+
+ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
+ lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
+
+ processedLabels.insert(lookupFloat[0]->getLabel());
+ userLabels.erase(lookupFloat[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookupFloat[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = lookupFloat[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
+ lookupFloat = input.getSharedRAbundFloatVectors();
+ }
+
+
+ if (m->control_pressed) { return lookupFloat; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
+ lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
+ }
+
+ return lookupFloat;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLefseCommand", "getRelabund");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
--- /dev/null
+//
+// makelefse.h
+// Mothur
+//
+// Created by SarahsWork on 6/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__makelefse__
+#define __Mothur__makelefse__
+
+#include "mothurout.h"
+#include "command.hpp"
+#include "inputdata.h"
+#include "sharedutilities.h"
+#include "phylosummary.h"
+
+/**************************************************************************************************/
+
+class MakeLefseCommand : public Command {
+public:
+ MakeLefseCommand(string);
+ MakeLefseCommand();
+ ~MakeLefseCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "make.lefse"; }
+ string getCommandCategory() { return "General"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload http://www.mothur.org/wiki/Make.lefse"; }
+ string getDescription() { return "creates LEfSe input file"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines, otulabel, hasGroupInfo;
+ string outputDir;
+ vector<string> outputNames, Groups;
+ string sharedfile, designfile, constaxonomyfile, relabundfile, scale, label, inputFile;
+
+ int runRelabund(map<string, consTax2>&, vector<SharedRAbundFloatVector*>&);
+
+ vector<SharedRAbundFloatVector*> getRelabund();
+ vector<SharedRAbundFloatVector*> getSharedRelabund();
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif /* defined(__Mothur__makelefse__) */
PDistCell() : index(0), dist(0) {};
PDistCell(ull c, float d) : index(c), dist(d) {}
};
+/***********************************************************************/
+struct consTax{
+ string name;
+ string taxonomy;
+ int abundance;
+ consTax() : name(""), taxonomy("unknown"), abundance(0) {};
+ consTax(string n, string t, int a) : name(n), taxonomy(t), abundance(a) {}
+};
+/***********************************************************************/
+struct consTax2{
+ string taxonomy;
+ int abundance;
+ consTax2() : taxonomy("unknown"), abundance(0) {};
+ consTax2(string t, int a) : taxonomy(t), abundance(a) {}
+};
/************************************************************/
struct clusterNode {
int numSeq;
exit(1);
}
}
+//**********************************************************************************************************************
+vector<consTax> MothurOut::readConsTax(string inputfile){
+ try {
+
+ vector<consTax> taxes;
+
+ ifstream in;
+ openInputFile(inputfile, in);
+
+ //read headers
+ getline(in);
+
+ while (!in.eof()) {
+
+ if (control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; gobble(in);
+ consTax temp(otu, tax, size);
+ taxes.push_back(temp);
+ }
+ in.close();
+
+ return taxes;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readConsTax");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
+ try {
+ ifstream in;
+ openInputFile(inputfile, in);
+
+ //read headers
+ getline(in);
+
+ while (!in.eof()) {
+
+ if (control_pressed) { break; }
+
+ string otu = ""; string tax = "unknown";
+ int size = 0;
+
+ in >> otu >> size >> tax; gobble(in);
+ consTax2 temp(tax, size);
+ taxes[otu] = temp;
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readConsTax");
+ exit(1);
+ }
+}
/**************************************************************************************************/
vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
try {
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
+ names.insert(pieces[i]);
+ }
}
in.close();
map<string, int> readNames(string);
map<string, int> readNames(string, unsigned long int&);
int readTax(string, map<string, string>&);
+ vector<consTax> readConsTax(string);
+ int readConsTax(string, map<string, consTax2>&);
int readNames(string, map<string, string>&, map<string, int>&);
int readNames(string, map<string, string>&);
int readNames(string, map<string, string>&, bool);
exit(1);
}
}
+/**************************************************************************************************/
+void PhyloSummary::print(ofstream& out, bool relabund){
+ try {
+
+ if (ignore) { assignRank(0); }
+
+ int totalChildrenInTree = 0;
+ map<string, int>::iterator itGroup;
+
+ map<string,int>::iterator it;
+ for(it=tree[0].children.begin();it!=tree[0].children.end();it++){
+ if (tree[it->second].total != 0) {
+ totalChildrenInTree++;
+ tree[0].total += tree[it->second].total;
+
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; }
+ }else if ( ct != NULL) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }
+ }
+ }
+ }
+
+ //print root
+ out << tree[0].name << "\t" << "1.0000" << "\t"; //root relative abundance is 1, everyone classifies to root
+
+ /*
+ if (groupmap != NULL) {
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; }
+ }else if ( ct != NULL) {
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } }
+ }*/
+
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) { out << "1.0000" << '\t'; }
+ }else if ( ct != NULL) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << "1.0000" << '\t'; } }
+ }
+
+ out << endl;
+
+ //print rest
+ print(0, out, relabund);
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "print");
+ exit(1);
+ }
+}
/**************************************************************************************************/
void PhyloSummary::print(int i, ofstream& out){
for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
if (tree[it->second].total != 0) {
-
+
int totalChildrenInTree = 0;
-
+
map<string,int>::iterator it2;
- for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
+ for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
if (tree[it2->second].total != 0) { totalChildrenInTree++; }
}
-
+
out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t";
map<string, int>::iterator itGroup;
// out << itGroup->second << '\t';
//}
vector<string> mGroups = groupmap->getNamesOfGroups();
- for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
}else if (ct != NULL) {
if (ct->hasGroupInfo()) {
vector<string> mGroups = ct->getNamesOfGroups();
- for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
}
}
out << endl;
exit(1);
}
}
+
+/**************************************************************************************************/
+
+void PhyloSummary::print(int i, ofstream& out, bool relabund){
+ try {
+ map<string,int>::iterator it;
+ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+
+ if (tree[it->second].total != 0) {
+
+ int totalChildrenInTree = 0;
+
+ map<string,int>::iterator it2;
+ for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){
+ if (tree[it2->second].total != 0) { totalChildrenInTree++; }
+ }
+
+ string nodeName = "";
+ int thisNode = it->second;
+ while (tree[thisNode].rank != "0") { //while you are not at top
+ if (m->control_pressed) { break; }
+ nodeName = tree[thisNode].name + "|" + nodeName;
+ thisNode = tree[thisNode].parent;
+ }
+ if (nodeName != "") { nodeName = nodeName.substr(0, nodeName.length()-1); }
+
+ out << nodeName << "\t" << (tree[it->second].total / (float)tree[i].total) << "\t";
+
+ map<string, int>::iterator itGroup;
+ if (groupmap != NULL) {
+ vector<string> mGroups = groupmap->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (tree[i].groupCount[mGroups[j]] == 0) {
+ out << 0 << '\t';
+ }else { out << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]) << '\t'; }
+ }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (tree[i].groupCount[mGroups[j]] == 0) {
+ out << 0 << '\t';
+ }else { out << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]) << '\t'; }
+ }
+ }
+ }
+ out << endl;
+
+ }
+
+ print(it->second, out, relabund);
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "print");
+ exit(1);
+ }
+}
/**************************************************************************************************/
void PhyloSummary::readTreeStruct(ifstream& in){
try {
int addSeqToTree(string, string);
int addSeqToTree(string, map<string, bool>);
void print(ofstream&);
+ void print(ofstream&, bool);
int getMaxLevel() { return maxLevel; }
private:
string getNextTaxon(string&);
vector<rawTaxNode> tree;
void print(int, ofstream&);
+ void print(int, ofstream&, bool);
void assignRank(int);
void readTreeStruct(ifstream&);
GroupMap* groupmap;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- if (debugOnly) { return 0; }
-
- commandFactory = CommandFactory::getInstance();
-
- m->mothurOut("Mothur's directories:"); m->mothurOutEndLine();
-
- //redirect output
- if ((output == "clear") || (output == "")) { output = ""; commandFactory->setOutputDirectory(output); }
- else if (output == "default") {
- string exepath = m->argv;
- output = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
- commandFactory->setOutputDirectory(output);
- }else {
- if (m->dirCheck(output)) {
- m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
- commandFactory->setOutputDirectory(output);
+ if (debugOnly) { }
+ else {
+ commandFactory = CommandFactory::getInstance();
+
+ m->mothurOut("Mothur's directories:"); m->mothurOutEndLine();
+
+ //redirect output
+ if ((output == "clear") || (output == "")) { output = ""; commandFactory->setOutputDirectory(output); }
+ else if (output == "default") {
+ string exepath = m->argv;
+ output = exepath.substr(0, (exepath.find_last_of('m')));
+
+ m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
+ commandFactory->setOutputDirectory(output);
+ }else {
+ if (m->dirCheck(output)) {
+ m->mothurOut("outputDir=" + output); m->mothurOutEndLine();
+ commandFactory->setOutputDirectory(output);
+ }
}
- }
-
- //redirect input
- if ((input == "clear") || (input == "")) { input = ""; commandFactory->setInputDirectory(input); }
- else if (input == "default") {
- string exepath = m->argv;
- input = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
- commandFactory->setInputDirectory(input);
- }else {
- if (m->dirCheck(input)) {
- m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
- commandFactory->setInputDirectory(input);
+
+ //redirect input
+ if ((input == "clear") || (input == "")) { input = ""; commandFactory->setInputDirectory(input); }
+ else if (input == "default") {
+ string exepath = m->argv;
+ input = exepath.substr(0, (exepath.find_last_of('m')));
+
+ m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
+ commandFactory->setInputDirectory(input);
+ }else {
+ if (m->dirCheck(input)) {
+ m->mothurOut("inputDir=" + input); m->mothurOutEndLine();
+ commandFactory->setInputDirectory(input);
+ }
}
- }
-
- //set default
- if (tempdefault == "clear") {
- #ifdef MOTHUR_FILES
- string temp = MOTHUR_FILES;
- m->mothurOut("tempDefault=" + temp); m->mothurOutEndLine();
+
+ //set default
+ if (tempdefault == "clear") {
+#ifdef MOTHUR_FILES
+ string temp = MOTHUR_FILES;
+ m->mothurOut("tempDefault=" + temp); m->mothurOutEndLine();
m->setDefaultPath(temp);
- #else
- string temp = "";
- m->mothurOut("No default directory defined at compile time."); m->mothurOutEndLine();
+#else
+ string temp = "";
+ m->mothurOut("No default directory defined at compile time."); m->mothurOutEndLine();
m->setDefaultPath(temp);
- #endif
- }else if (tempdefault == "") { //do nothing
- }else if (tempdefault == "default") {
- string exepath = m->argv;
- tempdefault = exepath.substr(0, (exepath.find_last_of('m')));
-
- m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
- m->setDefaultPath(tempdefault);
- }else {
- if (m->dirCheck(tempdefault)) {
+#endif
+ }else if (tempdefault == "") { //do nothing
+ }else if (tempdefault == "default") {
+ string exepath = m->argv;
+ tempdefault = exepath.substr(0, (exepath.find_last_of('m')));
+
m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
- m->setDefaultPath(tempdefault);
+ m->setDefaultPath(tempdefault);
+ }else {
+ if (m->dirCheck(tempdefault)) {
+ m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
+ m->setDefaultPath(tempdefault);
+ }
}
}
-
return 0;
}
catch(exception& e) {
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
forwardQual.trimQScores(foligo.length(), -1);
- reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+ reverseQual.trimQScores(roligo.length(), -1);
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
group = matches[0];
fStart = minFPos[0];
rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
}
//we have an acceptable match for the forward and reverse, but do they match?
success = pdiffs + 10;
break;
}
- //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
group = matches[0];
fStart = minFPos[0];
rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
}
//we have an acceptable match for the forward and reverse, but do they match?
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
forwardQual.trimQScores(foligo.length(), -1);
- reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+ reverseQual.trimQScores(roligo.length(), -1);
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
success = 0;
break;
}
}
- /*cout << minDiff << '\t' << minCount << '\t' << endl;
- for (int i = 0; i < minFGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;
- for (int i = 0; i < minRGroup.size(); i++) {
- cout << i << '\t';
- for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
- cout << endl;
- }
- cout << endl;*/
if(minDiff > pdiffs) { success = minDiff; } //no good matches
else {
bool foundMatch = false;
CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
- CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
+ CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength);
CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, maxHomoP);
- temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
+ temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; }
m->mothurConvert(temp, minLength);
temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
int currentSeqsDiffs = 0;
Sequence currSeq(inFASTA); m->gobble(inFASTA);
- //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+ //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
QualityScores currQual; QualityScores savedQual;
}
else{ currentSeqsDiffs += success; }
}
-
+ //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
if(numSpacers != 0){
success = trimOligos->stripSpacer(currSeq, currQual);
if(success > sdiffs) { trashCode += 's'; }
int thisBarcodeIndex = 0;
int thisPrimerIndex = 0;
-
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
if(numBarcodes != 0){
thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
-
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
if(numFPrimers != 0){
thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
if(thisSuccess > pdiffs) { thisTrashCode += "f"; }