objects = {
/* Begin PBXBuildFile section */
+ 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */; };
211C38320F961DD400FEE541 /* sharedutilities.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38300F961DD400FEE541 /* sharedutilities.cpp */; };
211C38380F961E1F00FEE541 /* treegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */; };
21DDC01B0F97A8FE0060691C /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */; };
+ 21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */; };
370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */; };
372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; };
372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69160FC1C8AF00137ACD /* blastdb.cpp */; };
373C691F0FC1C98600137ACD /* nast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C691D0FC1C98600137ACD /* nast.cpp */; };
373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69290FC1C9EB00137ACD /* nastreport.cpp */; };
- 373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69320FC1CA9E00137ACD /* distancedb.cpp */; };
373C699A0FC1E63600137ACD /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69940FC1E63600137ACD /* boneh.cpp */; };
373C699B0FC1E63600137ACD /* efron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69960FC1E63600137ACD /* efron.cpp */; };
373C699C0FC1E63600137ACD /* solow.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 373C69980FC1E63600137ACD /* solow.cpp */; };
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
+ 211A24E90FC306BC00769A33 /* getrepseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrepseqscommand.h; sourceTree = "<group>"; };
+ 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrepseqscommand.cpp; sourceTree = "<group>"; };
211C38300F961DD400FEE541 /* sharedutilities.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedutilities.cpp; sourceTree = "<group>"; };
211C38310F961DD400FEE541 /* sharedutilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedutilities.h; sourceTree = "<group>"; };
211C38360F961E1F00FEE541 /* treegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treegroupscommand.cpp; sourceTree = "<group>"; };
211C38370F961E1F00FEE541 /* treegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treegroupscommand.h; sourceTree = "<group>"; };
21DDC0190F97A8FE0060691C /* bootstrapsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrapsharedcommand.h; sourceTree = "<group>"; };
21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrapsharedcommand.cpp; sourceTree = "<group>"; };
+ 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = matrixoutputcommand.h; sourceTree = "<group>"; };
+ 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = matrixoutputcommand.cpp; sourceTree = "<group>"; };
370B88050F8A4EE4005AB382 /* getoturepcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getoturepcommand.h; sourceTree = "<group>"; };
370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getoturepcommand.cpp; sourceTree = "<group>"; };
372E126E0F26365B0095CF7E /* readotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotucommand.h; sourceTree = "<group>"; };
373C691E0FC1C98600137ACD /* nast.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nast.hpp; sourceTree = "<group>"; };
373C69290FC1C9EB00137ACD /* nastreport.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nastreport.cpp; sourceTree = "<group>"; };
373C692A0FC1C9EB00137ACD /* nastreport.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = nastreport.hpp; sourceTree = "<group>"; };
- 373C69320FC1CA9E00137ACD /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distancedb.cpp; sourceTree = "<group>"; };
- 373C69330FC1CA9E00137ACD /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = "<group>"; };
373C69940FC1E63600137ACD /* boneh.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = boneh.cpp; sourceTree = "<group>"; };
373C69950FC1E63600137ACD /* boneh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = boneh.h; sourceTree = "<group>"; };
373C69960FC1E63600137ACD /* efron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = efron.cpp; sourceTree = "<group>"; };
08FB7794FE84155DC02AAC07 /* Mothur */ = {
isa = PBXGroup;
children = (
- EB72FE240FC1F5CA0051AC11 /* shen.cpp */,
- EB72FE250FC1F5CA0051AC11 /* shen.h */,
08FB7795FE84155DC02AAC07 /* Source */,
C6859E8C029090F304C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
37D9283C0F21331F001D4494 /* sharedthetan.cpp */,
37D9283F0F21331F001D4494 /* sharedthetayc.h */,
37D9283E0F21331F001D4494 /* sharedthetayc.cpp */,
+ EB72FE250FC1F5CA0051AC11 /* shen.h */,
+ EB72FE240FC1F5CA0051AC11 /* shen.cpp */,
37D928410F21331F001D4494 /* simpson.h */,
37D928400F21331F001D4494 /* simpson.cpp */,
37D928430F21331F001D4494 /* sobs.h */,
A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
+ 211A24E90FC306BC00769A33 /* getrepseqscommand.h */,
+ 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */,
375873F10F7D64800040F377 /* heatmapcommand.h */,
375873F00F7D64800040F377 /* heatmapcommand.cpp */,
37D927E40F21331F001D4494 /* helpcommand.h */,
37D927E30F21331F001D4494 /* helpcommand.cpp */,
375873F40F7D648F0040F377 /* libshuffcommand.h */,
375873F30F7D648F0040F377 /* libshuffcommand.cpp */,
+ 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */,
+ 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */,
375873F60F7D649C0040F377 /* nocommands.cpp */,
375873F70F7D649C0040F377 /* nocommands.h */,
37D927FA0F21331F001D4494 /* parselistcommand.h */,
37D927D40F21331F001D4494 /* database.hpp */,
37D927D30F21331F001D4494 /* database.cpp */,
37D927D50F21331F001D4494 /* datavector.hpp */,
- 373C69330FC1CA9E00137ACD /* distancedb.hpp */,
- 373C69320FC1CA9E00137ACD /* distancedb.cpp */,
37D927DC0F21331F001D4494 /* fastamap.h */,
37D927DB0F21331F001D4494 /* fastamap.cpp */,
375873EA0F7D64520040F377 /* fullmatrix.h */,
373C69180FC1C8AF00137ACD /* blastdb.cpp in Sources */,
373C691F0FC1C98600137ACD /* nast.cpp in Sources */,
373C692B0FC1C9EB00137ACD /* nastreport.cpp in Sources */,
- 373C69340FC1CA9E00137ACD /* distancedb.cpp in Sources */,
373C699A0FC1E63600137ACD /* boneh.cpp in Sources */,
373C699B0FC1E63600137ACD /* efron.cpp in Sources */,
373C699C0FC1E63600137ACD /* solow.cpp in Sources */,
EB72FE260FC1F5CA0051AC11 /* shen.cpp in Sources */,
+ 211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */,
+ 21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "kmerdb.hpp"
#include "suffixdb.hpp"
#include "blastdb.hpp"
-#include "distancedb.hpp"
#include "nast.hpp"
#include "nastreport.hpp"
convert(globaldata->getMismatch(), misMatch);
convert(globaldata->getGapopen(), gapOpen);
convert(globaldata->getGapextend(), gapExtend);
- distanceFileName = "????";
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the AlignCommand class Function AlignCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
if(globaldata->getSearch() == "kmer") { templateDB = new KmerDB(templateFileName, kmerSize); }
else if(globaldata->getSearch() == "suffix") { templateDB = new SuffixDB(templateFileName); }
else if(globaldata->getSearch() == "blast") { templateDB = new BlastDB(templateFileName, gapOpen, gapExtend, match, misMatch); }
- else if(globaldata->getSearch() == "distance") { templateDB = new DistanceDB(templateFileName, distanceFileName); }
else { cout << globaldata->getSearch() << " is not a valid search option. I will run the command using suffix." << endl;
templateDB = new SuffixDB(templateFileName); }
if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
- //make new folder for bin info
- //string foldername = "/" + getRootName(globaldata->getListFile()) + list->getLabel() + ".bins/";
- // mkdir(foldername.c_str());
-
string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
openOutputFile(outputFileName, out);
//for each bin in the list vector
for (int i = 0; i < list->size(); i++) {
-
- //create output file
- //string outputFileName = foldername + getRootName(globaldata->getListFile()) + "bin" + toString(i+1) + ".fasta";
- //openOutputFile(outputFileName, out);
binnames = list->get(i);
while (binnames.find_first_of(',') != -1) {
remove(outputFileName.c_str());
return 0;
}
- //out.close();
+
}
out.close();
}
+ delete list;
list = input->getListVector();
count++;
}
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
#include "concensuscommand.h"
#include "distancecommand.h"
#include "aligncommand.h"
+#include "getrepseqscommand.h"
/***********************************************************/
else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); }
else if(commandName == "venn") { command = new VennCommand(); }
else if(commandName == "bin.seqs") { command = new BinSeqCommand(); }
+ else if(commandName == "get.repseqs") { command = new GetRepSeqsCommand(); }
else if(commandName == "get.oturep") { command = new GetOTURepCommand(); }
else if(commandName == "tree.shared") { command = new TreeGroupCommand(); }
else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(); }
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
if (parameter == "ends" ) { ends = value; }
if (parameter == "processors" ) { processors = value; }
if (parameter == "size" ) { size = value; }
-
if (parameter == "template") { templatefile = value; }
if (parameter == "search") { search = value; }
if (parameter == "ksize") { ksize = value; }
if (parameter == "mismatch") { mismatch = value; }
if (parameter == "gapopen") { gapopen = value; }
if (parameter == "gapextend" ) { gapextend = value; }
-
+
}
}
validateBinFiles();
}
+ if ((commandName == "get.repseqs")) {
+ if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the get.repseqs command." << endl; return false; }
+ else if (groupfile == "") { cout << "You must provide a groupfile before you can use the get.repseqs command." << endl; return false; }
+ validateBinFiles();
+ }
+
+
if ((commandName == "get.oturep")) {
if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl;
int ableToOpen;
if (fastafile == "") {
- cout << "fasta is a required parameter for bin.seqs and get.oturep commands." << endl; errorFree = false;
+ cout << "fasta is a required parameter for bin.seqs, get.oturep and get.repseqs commands." << endl; errorFree = false;
}else if (fastafile != "") {
//is it a valid filename'
ableToOpen = openInputFile(fastafile, filehandle);
filehandle.close();
//unable to open
if (ableToOpen == 1) { errorFree = false; }
+ }else if (groupfile != "") {
+ //is it a valid filename'
+ ifstream filehandle;
+ int ableToOpen = openInputFile(groupfile, filehandle);
+ filehandle.close();
+ //unable to open
+ if (ableToOpen == 1) { errorFree = false; }
}
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
--- /dev/null
+/*
+ * getrepseqscommand.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 5/19/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "getrepseqscommand.h"
+
+//**********************************************************************************************************************
+GetRepSeqsCommand::GetRepSeqsCommand(){
+ try {
+ globaldata = GlobalData::getInstance();
+ fastafile = globaldata->getFastaFile();
+ namesfile = globaldata->getNameFile();
+ openInputFile(fastafile, in);
+
+ fasta = new FastaMap();
+
+ //read in group map info.
+ groupMap = new GroupMap(globaldata->getGroupFile());
+ groupMap->readMap();
+
+ //fill filehandles with neccessary ofstreams
+ int i;
+ ofstream* temp;
+ //one for each group
+ for (i=0; i<groupMap->getNumGroups(); i++) {
+ temp = new ofstream;
+ filehandles[groupMap->namesOfGroups[i]] = temp;
+ }
+
+ //one for shared
+ temp = new ofstream;
+ string s = "shared";
+ filehandles[s] = temp;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+GetRepSeqsCommand::~GetRepSeqsCommand(){
+ delete input;
+ delete read;
+ delete fasta;
+ delete list;
+}
+
+//**********************************************************************************************************************
+
+int GetRepSeqsCommand::execute(){
+ try {
+ int count = 1;
+ string binnames, name, sequence;
+
+ //read fastafile
+ fasta->readFastaFile(in);
+
+ //set format to list so input can get listvector
+ globaldata->setFormat("list");
+
+ //if user gave a namesfile then use it
+ if (namesfile != "") {
+ readNamesFile();
+ }
+
+ //read list file
+ read = new ReadOTUFile(globaldata->getListFile());
+ read->read(&*globaldata);
+
+ input = globaldata->ginput;
+ list = globaldata->gListVector;
+
+ while(list != NULL){
+
+ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
+
+ cout << list->getLabel() << '\t' << count << endl;
+
+ //open output list files
+ for (int i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
+ openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]]));
+ used[groupMap->namesOfGroups[i]] = false;
+ }
+ string s = "shared";
+ openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s]));
+ used[s] = false;
+
+
+ //for each bin in the list vector
+ for (int i = 0; i < list->size(); i++) {
+ seq.clear();
+ //uses this to determine if the bin is unique to one group or if it is shared
+ map<string, string> groups;
+
+ //determine if this otu is unique to one group or not
+ binnames = list->get(i);
+ while (binnames.find_first_of(',') != -1) {
+ //parse out each name in bin
+ name = binnames.substr(0,binnames.find_first_of(','));
+ binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
+
+ //do work for that name
+ sequence = fasta->getSequence(name);
+ if (sequence != "not found") {
+ string group = groupMap->getGroup(name);
+ if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
+ else {
+ cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl;
+ removeFiles(list->getLabel());
+ return 0;
+ }
+ name = ">" + name + "|" + toString(i+1);
+ seq[name] = sequence;
+ }else {
+ cout << name << " is missing from your fasta or name file. Please correct. " << endl;
+ removeFiles(list->getLabel());
+ return 0;
+ }
+
+ }
+
+ //get last name
+ sequence = fasta->getSequence(binnames);
+ if (sequence != "not found") {
+ string group = groupMap->getGroup(binnames);
+ if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
+ else {
+ cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl;
+ removeFiles(list->getLabel());
+ return 0;
+ }
+ binnames = ">" + binnames + "|" + toString(i+1); //attach bin number to name
+ seq[binnames] = sequence;
+ }else {
+ cout << binnames << " is missing from your fasta or name file. Please correct. " << endl;
+ removeFiles(list->getLabel());
+ return 0;
+ }
+
+ //output each bin to files
+ //what file does this bin need to be outputted to
+ if (groups.size() == 1) { //this bin is unique to one group
+ it3 = groups.begin();
+ string uniqueGroup = it3->first;
+ used[uniqueGroup] = true;
+ //print out sequences from that bin to shared file
+ for (it3 = seq.begin(); it3 != seq.end(); it3++){
+ *(filehandles[uniqueGroup]) << it3->first << endl;
+ *(filehandles[uniqueGroup]) << it3->second << endl;
+ }
+ }else {//this bin has sequences from multiple groups in it
+ used[s] = true;
+ //print out sequences from that bin to shared file
+ for (it3 = seq.begin(); it3 != seq.end(); it3++){
+ *(filehandles[s]) << it3->first << endl;
+ *(filehandles[s]) << it3->second << endl;
+ }
+ }
+ }
+
+ //close ostreams and remove unused files
+ for (it = filehandles.begin(); it != filehandles.end(); it++) {
+ it->second->close();
+ if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta"; remove(filename.c_str()); }
+ }
+
+ }
+
+ delete list;
+ list = input->getListVector();
+ count++;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+void GetRepSeqsCommand::readNamesFile() {
+ try {
+ vector<string> dupNames;
+ openInputFile(namesfile, inNames);
+
+ string name, names, sequence;
+
+ while(inNames){
+ inNames >> name; //read from first column A
+ inNames >> names; //read from second column A,B,C,D
+
+ dupNames.clear();
+
+ //parse names into vector
+ splitAtComma(names, dupNames);
+
+ //store names in fasta map
+ sequence = fasta->getSequence(name);
+ for (int i = 0; i < dupNames.size(); i++) {
+ fasta->push_back(dupNames[i], sequence);
+ }
+
+ gobble(inNames);
+ }
+ inNames.close();
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+void GetRepSeqsCommand::removeFiles(string label) {
+ try {
+ //close ostreams
+ for (it = filehandles.begin(); it != filehandles.end(); it++) {
+ it->second->close();
+ }
+
+ //remove output files because there was an error
+ for (int i=0; i<groupMap->getNumGroups(); i++) {
+ string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta";
+ remove(outputFileName.c_str());
+ }
+ string outputFileName = fastafile + "shared"+ label + ".fasta";
+ remove(outputFileName.c_str());
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
--- /dev/null
+#ifndef GETREPSEQSCOMMAND_H
+#define GETREPSEQSCOMMAND_H
+/*
+ * getrepseqscommand.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 5/19/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "readotu.h"
+#include "fastamap.h"
+#include "groupmap.h"
+
+
+class GlobalData;
+
+class GetRepSeqsCommand : public Command {
+
+public:
+ GetRepSeqsCommand();
+ ~GetRepSeqsCommand();
+ int execute();
+
+private:
+ GlobalData* globaldata;
+ ListVector* list;
+ ReadOTUFile* read;
+ GroupMap* groupMap;
+ InputData* input;
+ FastaMap* fasta;
+ string filename, fastafile, namesfile;
+ map<string, ofstream*> filehandles;
+ map<string, ofstream*>::iterator it;
+ map<string, bool> used; //group, if it had any unique otus
+ map<string, bool>::iterator it2;
+ map<string, string> seq;
+ map<string, string>::iterator it3;
+ ifstream in, inNames;
+
+ void readNamesFile();
+ void removeFiles(string);
+};
+
+#endif
if (key == "ends" ) { ends = value; }
if (key == "processors" ) { processors = value; }
if (key == "size" ) { size = value; }
-
-
-
-
if (key == "template") { templatefile = value; }
if (key == "search") { search = value; }
if (key == "ksize") { ksize = value; }
if (key == "ends" ) { ends = value; }
if (key == "processors" ) { processors = value; }
if (key == "size" ) { size = value; }
-
if (key == "template") { templatefile = value; }
if (key == "search") { search = value; }
if (key == "ksize") { ksize = value; }
string GlobalData::getEnds() { return ends; }
string GlobalData::getProcessors() { return processors; }
string GlobalData::getSize() { return size; }
-
-void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;}
-void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;}
-void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;}
-void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;}
-void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;}
string GlobalData::getTemplateFile() { return templatefile;}
string GlobalData::getSearch() { return search; }
string GlobalData::getKSize() { return ksize; }
string GlobalData::getGapopen() { return gapopen; }
string GlobalData::getGapextend() { return gapextend; }
+
+void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;}
+void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;}
+void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;}
+void GlobalData::setPhylipFile(string file) { phylipfile = file; inputFileName = file;}
+void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file;}
void GlobalData::setGroupFile(string file) { groupfile = file; }
void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; fileroot = file;}
void GlobalData::setNameFile(string file) { namefile = file; }
string getTrump();
string getSoft();
string getFilter();
-
-
string getScale();
cout << "The align.seqs command parameters are fasta, phylip, clustal, nexus, template, search, ksize, align, match, mismatch, gapopen and gapextend. " << "\n";
cout << "You must use one of the following parameters for your candidate filename: fasta, phylip, clustal or nexus. " << "\n";
cout << "The template parameter is also required." << "\n";
- cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer, blast and distance. The default is suffix." << "\n";
+ cout << "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is suffix." << "\n";
cout << "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is blast." << "\n";
cout << "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 7." << "\n";
cout << "The match parameter allows you to specify the bonus for having the same base. The default is 1.0." << "\n";
cout << "The default value for line and label are all lines in your inputfile." << "\n";
cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n";
cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "get.repseqs") {
+ cout << "The get.repseqs command can only be executed after a successful read.otu command of a list file." << "\n";
+ cout << "The get.repseqs command parameters are fasta, name, group, line and label. The fasta and group parameters are required, and you may not use line and label at the same time." << "\n";
+ cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
+ cout << "The get.repseqss command should be in the following format: get.repseqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupfile, line=yourLines, label=yourLabels)." << "\n";
+ cout << "Example get.repseqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n";
+ cout << "The default value for line and label are all lines in your inputfile." << "\n";
+ cout << "The get.repseqs command outputs several .fasta files for each distance you specify. " << "\n";
+ cout << "If the distance level you choose has bins that contain only sequences unique to a specific group those sequences are outputted to a file for that group." << "\n";
+ cout << "If the bin contains sequences from multiple groups then the bin is outputted to the shared fasta file." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
}else if (globaldata->helpRequest == "get.oturep") {
cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n";
cout << "The get.oturep command parameters are list, fasta, name, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n";
--- /dev/null
+/*
+ * matrixoutputcommand.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 5/20/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "matrixoutputcommand.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+
+
+//**********************************************************************************************************************
+
+MatrixOutputCommand::MatrixOutputCommand(){
+ try {
+ globaldata = GlobalData::getInstance();
+ validCalculator = new ValidCalculators();
+
+ int i;
+ for (i=0; i<globaldata->Estimators.size(); i++) {
+ if (validCalculator->isValidCalculator("matrix", globaldata->Estimators[i]) == true) {
+ if (globaldata->Estimators[i] == "jabund") {
+ matrixCalculators.push_back(new JAbund());
+ }else if (globaldata->Estimators[i] == "sorabund") {
+ matrixCalculators.push_back(new SorAbund());
+ }else if (globaldata->Estimators[i] == "jclass") {
+ matrixCalculators.push_back(new Jclass());
+ }else if (globaldata->Estimators[i] == "sorclass") {
+ matrixCalculators.push_back(new SorClass());
+ }else if (globaldata->Estimators[i] == "jest") {
+ matrixCalculators.push_back(new Jest());
+ }else if (globaldata->Estimators[i] == "sorest") {
+ matrixCalculators.push_back(new SorEst());
+ }else if (globaldata->Estimators[i] == "thetayc") {
+ matrixCalculators.push_back(new ThetaYC());
+ }else if (globaldata->Estimators[i] == "thetan") {
+ matrixCalculators.push_back(new ThetaN());
+ }else if (globaldata->Estimators[i] == "morisitahorn") {
+ matrixCalculators.push_back(new MorHorn());
+ }else if (globaldata->Estimators[i] == "braycurtis") {
+ matrixCalculators.push_back(new BrayCurtis());
+ }
+ }
+ }
+
+ //reset calc for next command
+ globaldata->setCalc("");
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+MatrixOutputCommand::~MatrixOutputCommand(){
+ delete input;
+ delete read;
+}
+
+//**********************************************************************************************************************
+
+int MatrixOutputCommand::execute(){
+ try {
+ int count = 1;
+ EstOutput data;
+ vector<SharedRAbundVector*> subset;
+
+ //if the users entered no valid calculators don't execute command
+ if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
+
+ //you have groups
+ read = new ReadOTUFile(globaldata->inputFileName);
+ read->read(&*globaldata);
+
+ input = globaldata->ginput;
+ lookup = input->getSharedRAbundVectors();
+
+ if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;}
+
+ numGroups = globaldata->Groups.size();
+
+ while(lookup[0] != NULL){
+
+ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){
+
+ cout << lookup[0]->getLabel() << '\t' << count << endl;
+
+ //for each calculator
+ for(int i = 0 ; i < matrixCalculators.size(); i++) {
+
+ //initialize simMatrix
+ simMatrix.clear();
+ simMatrix.resize(numGroups);
+ for (int m = 0; m < simMatrix.size(); m++) {
+ for (int j = 0; j < simMatrix.size(); j++) {
+ simMatrix[m].push_back(0.0);
+ }
+ }
+
+ for (int k = 0; k < lookup.size(); k++) {
+ for (int l = k; l < lookup.size(); l++) {
+ if (k != l) { //we dont need to similiarity of a groups to itself
+ //get estimated similarity between 2 groups
+
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(lookup[k]); subset.push_back(lookup[l]);
+
+ data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
+ //save values in similarity matrix
+ simMatrix[k][l] = data[0];
+ simMatrix[l][k] = data[0];
+ }
+ }
+ }
+
+ exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".matrix";
+ openOutputFile(exportFileName, out);
+ printSims(out);
+ out.close();
+
+ }
+ }
+
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+
+ //get next line to process
+ lookup = input->getSharedRAbundVectors();
+ count++;
+ }
+
+ //reset groups parameter
+ globaldata->Groups.clear(); globaldata->setGroups("");
+
+ return 0;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/***********************************************************/
+void MatrixOutputCommand::printSims(ostream& out) {
+ try {
+
+ //output column headers
+ out << '\t';
+ for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
+ out << endl;
+
+
+ for (int m = 0; m < simMatrix.size(); m++) {
+ out << lookup[m]->getGroup() << '\t';
+ for (int n = 0; n < simMatrix.size(); n++) {
+ out << simMatrix[m][n] << '\t';
+ }
+ out << endl;
+ }
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/***********************************************************/
+
+
--- /dev/null
+#ifndef MATRIXOUTPUTCOMMAND_H
+#define MATRIXOUTPUTCOMMAND_H
+
+/*
+ * matrixoutputcommand.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 5/20/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+#include "command.hpp"
+#include "inputdata.h"
+#include "groupmap.h"
+#include "readotu.h"
+#include "validcalculator.h"
+
+/* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
+ The user can select the lines or labels they wish to use as well as the groups they would like included.
+ They can also use as many or as few calculators as they wish. */
+
+class GlobalData;
+
+class MatrixOutputCommand : public Command {
+
+public:
+ MatrixOutputCommand();
+ ~MatrixOutputCommand();
+ int execute();
+
+private:
+ void printSims(ostream&);
+
+ GlobalData* globaldata;
+ ReadOTUFile* read;
+ vector<Calculator*> matrixCalculators;
+ vector< vector<float> > simMatrix;
+ InputData* input;
+ ValidCalculators* validCalculator;
+ vector<SharedRAbundVector*> lookup;
+ string exportFileName;
+ int numGroups;
+ ofstream out;
+
+};
+
+
+#endif
+
*
*
* Created by Pat Schloss on 2/19/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
*
*
* Created by Pat Schloss on 2/19/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
using namespace std;
displays[i]->reset();
}
+ delete merge;
}
for(int i=0;i<displays.size();i++){
* Mothur
*
* Created by Thomas Ryabin on 5/11/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/11/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/18/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/18/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
* Mothur
*
* Created by Thomas Ryabin on 5/13/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
+ * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved.
*
*/
globaldata = GlobalData::getInstance();
format = globaldata->getFormat();
validCalculator = new ValidCalculators();
- util = new SharedUtil();
-
+
int i;
for (i=0; i<globaldata->Estimators.size(); i++) {
if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) {
TreeGroupCommand::~TreeGroupCommand(){
delete input;
delete read;
- delete util;
}
//**********************************************************************************************************************
input = globaldata->ginput;
lookup = input->getSharedRAbundVectors();
- if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; }
+ if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; }
numGroups = globaldata->Groups.size();
groupNames = "";
//create a new filename
outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
-
+
+
for (int k = 0; k < lookup.size(); k++) {
for (int l = k; l < lookup.size(); l++) {
if (k != l) { //we dont need to similiarity of a groups to itself
createTree();
}
}
-
+
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+
//get next line to process
lookup = input->getSharedRAbundVectors();
count++;
}
}
/***********************************************************/
-void TreeGroupCommand::printSims() {
+void TreeGroupCommand::printSims(ostream& out) {
try {
- cout << "simsMatrix" << endl;
+
+ //output column headers
+ out << '\t';
+ for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
+ out << endl;
+
+
for (int m = 0; m < simMatrix.size(); m++) {
+ out << lookup[m]->getGroup() << '\t';
for (int n = 0; n < simMatrix.size(); n++) {
- cout << simMatrix[m][n] << '\t';
+ out << simMatrix[m][n] << '\t';
}
- cout << endl;
+ out << endl;
}
}
*/
#include "command.hpp"
-#include "sharedordervector.h"
-#include "sharedlistvector.h"
#include "inputdata.h"
#include "groupmap.h"
#include "readotu.h"
#include "validcalculator.h"
#include "tree.h"
#include "treemap.h"
-#include "sharedutilities.h"
/* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
private:
void createTree();
- void printSims();
+ void printSims(ostream&);
GlobalData* globaldata;
- SharedUtil* util;
ReadOTUFile* read;
TreeMap* tmap;
Tree* t;
map<int, int> index; //maps row in simMatrix to vector index in the tree
InputData* input;
ValidCalculators* validCalculator;
- SharedListVector* SharedList;
- SharedOrderVector* order;
vector<SharedRAbundVector*> lookup;
string format, outputFile, groupNames;
int numGroups;
+ ofstream out;
};
initialTreeGroups();
initialBoot();
initialDistance();
+ initialMatrix();
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the ValidCalculator class Function ValidCalculator. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
}
cout << endl;
return false; }
+ }else if (parameter == "matrix") {
+ //is it valid
+ if ((matrix.find(calculator)) != (matrix.end())) {
+ return true;
+ }else {
+ cout << calculator << " is not a valid estimator for the matrix.output command and will be disregarded. Valid estimators are ";
+ for (it = matrix.begin(); it != matrix.end(); it++) {
+ cout << it->first << ", ";
+ }
+ cout << endl;
+ return false; }
}else if (parameter == "boot") {
//is it valid
if ((boot.find(calculator)) != (boot.end())) {
exit(1);
}
}
+/********************************************************************/
+void ValidCalculators::initialMatrix() {
+ try {
+ matrix["jabund"] = "jabund";
+ matrix["sorabund"] = "sorabund";
+ matrix["jclass"] = "jclass";
+ matrix["sorclass"] = "sorclass";
+ matrix["jest"] = "jest";
+ matrix["sorest"] = "sorest";
+ matrix["thetayc"] = "thetayc";
+ matrix["thetan"] = "thetan";
+ matrix["morisitahorn"] = "morisitahorn";
+ matrix["braycurtis"] = "braycurtis";
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the ValidCalculator class Function initialMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the ValidCalculator class function initialMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
/********************************************************************/
void ValidCalculators::initialBoot() {
try {
map<string, string> vennsingle;
map<string, string> vennshared;
map<string, string> treegroup;
+ map<string, string> matrix;
map<string, string> boot;
map<string, string> distance;
map<string, string>::iterator it;
void initialVennSingle();
void initialVennShared();
void initialTreeGroups();
+ void initialMatrix();
void initialBoot();
void initialDistance();
};
commands["read.tree"] = "read.tree";
commands["read.seqs"] = "read.seqs";
commands["bin.seqs"] = "bin.seqs";
+ commands["get.repseqs"] = "get.repseqs";
commands["get.oturep"] = "get.oturep";
commands["cluster"] = "cluster";
commands["deconvolute"] = "deconvolute";
string binseqsArray[] = {"fasta","line","label","name"};
commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string));
+ string getrepseqsArray[] = {"fasta","line","label","name", "group"};
+ commandParameters["get.repseqs"] = addParameters(getrepseqsArray, sizeof(getrepseqsArray)/sizeof(string));
+
string getOTURepArray[] = {"fasta","list","line","label","name"};
commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string));