]> git.donarmstrong.com Git - mothur.git/commitdiff
modified bin.seqs and get.oturep commands to include use of a groupfile if provided...
authorwestcott <westcott>
Thu, 21 May 2009 16:45:22 +0000 (16:45 +0000)
committerwestcott <westcott>
Thu, 21 May 2009 16:45:22 +0000 (16:45 +0000)
12 files changed:
Mothur.xcodeproj/project.pbxproj
binsequencecommand.cpp
binsequencecommand.h
commandfactory.cpp
errorchecking.cpp
getoturepcommand.cpp
getoturepcommand.h
getrepseqscommand.cpp [deleted file]
getrepseqscommand.h [deleted file]
helpcommand.cpp
validcommands.cpp
validparameter.cpp

index c49b637db8a6e8d44298f741ff23f3794d95d2c4..1b52eee1d8149b7675c22cb6055b1ae2a04698db 100644 (file)
@@ -7,7 +7,6 @@
        objects = {
 
 /* Begin PBXBuildFile section */
-               211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */; };
                211C38320F961DD400FEE541 /* sharedutilities.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38300F961DD400FEE541 /* sharedutilities.cpp */; };
                211C38380F961E1F00FEE541 /* treegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 211C38360F961E1F00FEE541 /* treegroupscommand.cpp */; };
                21DDC01B0F97A8FE0060691C /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 21DDC01A0F97A8FE0060691C /* bootstrapsharedcommand.cpp */; };
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
-               211A24E90FC306BC00769A33 /* getrepseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrepseqscommand.h; sourceTree = "<group>"; };
-               211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrepseqscommand.cpp; sourceTree = "<group>"; };
                211C38300F961DD400FEE541 /* sharedutilities.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedutilities.cpp; sourceTree = "<group>"; };
                211C38310F961DD400FEE541 /* sharedutilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedutilities.h; sourceTree = "<group>"; };
                211C38360F961E1F00FEE541 /* treegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treegroupscommand.cpp; sourceTree = "<group>"; };
                                A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
                                370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
                                370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
-                               211A24E90FC306BC00769A33 /* getrepseqscommand.h */,
-                               211A24EA0FC306BC00769A33 /* getrepseqscommand.cpp */,
                                375873F10F7D64800040F377 /* heatmapcommand.h */,
                                375873F00F7D64800040F377 /* heatmapcommand.cpp */,
                                37D927E40F21331F001D4494 /* helpcommand.h */,
                                373C699B0FC1E63600137ACD /* efron.cpp in Sources */,
                                373C699C0FC1E63600137ACD /* solow.cpp in Sources */,
                                EB72FE260FC1F5CA0051AC11 /* shen.cpp in Sources */,
-                               211A24EB0FC306BC00769A33 /* getrepseqscommand.cpp in Sources */,
                                21E859D80FC4632E005E1A48 /* matrixoutputcommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
index 7810ea9f793255e31e44c854a5a4c21639019c60..f1ba8b5f38effa755a215e93a87dcf322001dc6d 100644 (file)
@@ -15,8 +15,15 @@ BinSeqCommand::BinSeqCommand(){
                globaldata = GlobalData::getInstance();
                fastafile = globaldata->getFastaFile();
                namesfile = globaldata->getNameFile();
+               groupfile = globaldata->getGroupFile();
                openInputFile(fastafile, in);
                
+               if (groupfile != "") {
+                       //read in group map info.
+                       groupMap = new GroupMap(groupfile);
+                       groupMap->readMap();
+               }
+               
                fasta = new FastaMap();
        }
        catch(exception& e) {
@@ -36,6 +43,9 @@ BinSeqCommand::~BinSeqCommand(){
        delete read;
        delete fasta;
        delete list;
+       if (groupfile != "") {
+               delete groupMap;
+       }
 }
 
 //**********************************************************************************************************************
@@ -83,9 +93,23 @@ int BinSeqCommand::execute(){
                                                //do work for that name
                                                sequence = fasta->getSequence(name);
                                                if (sequence != "not found") {
-                                                       name = name + "|" + toString(i+1);
-                                                       out << ">" << name << endl;
-                                                       out << sequence << endl;
+                                                       //if you don't have groups
+                                                       if (groupfile == "") {
+                                                               name = name + "|" + toString(i+1);
+                                                               out << ">" << name << endl;
+                                                               out << sequence << endl;
+                                                       }else {//if you do have groups
+                                                               string group = groupMap->getGroup(name);
+                                                               if (group == "not found") {  
+                                                                       cout << name << " is missing from your group file. Please correct. " << endl;
+                                                                       remove(outputFileName.c_str());
+                                                                       return 0;
+                                                               }else{
+                                                                       name = name + "|" + group + "|" + toString(i+1);
+                                                                       out << ">" << name << endl;
+                                                                       out << sequence << endl;
+                                                               }
+                                                       }
                                                }else { 
                                                        cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
                                                        remove(outputFileName.c_str());
@@ -97,9 +121,23 @@ int BinSeqCommand::execute(){
                                        //get last name
                                        sequence = fasta->getSequence(binnames);
                                        if (sequence != "not found") {
-                                               name = binnames + '|' + toString(i+1);
-                                               out << ">" << name << endl;
-                                               out << sequence << endl;
+                                               //if you don't have groups
+                                               if (groupfile == "") {
+                                                       binnames = binnames + "|" + toString(i+1);
+                                                       out << ">" << binnames << endl;
+                                                       out << sequence << endl;
+                                               }else {//if you do have groups
+                                                       string group = groupMap->getGroup(binnames);
+                                                       if (group == "not found") {  
+                                                               cout << binnames << " is missing from your group file. Please correct. " << endl;
+                                                               remove(outputFileName.c_str());
+                                                               return 0;
+                                                       }else{
+                                                               binnames = binnames + "|" + group + "|" + toString(i+1);
+                                                               out << ">" << binnames << endl;
+                                                               out << sequence << endl;
+                                                       }
+                                               }
                                        }else { 
                                                cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
                                                remove(outputFileName.c_str());
index 617f5ec7ac98f17f358c383ca424ebd817cb3227..e2ff648283a9c00569a8d2a4170a040b4a494902 100644 (file)
@@ -16,6 +16,7 @@
 #include "listvector.hpp"
 #include "readotu.h"
 #include "fastamap.h"
+#include "groupmap.h"
 
 
 class GlobalData;
@@ -33,7 +34,8 @@ private:
        ReadOTUFile* read;
        InputData* input;
        FastaMap* fasta;
-       string filename, fastafile, namesfile;
+       GroupMap* groupMap;
+       string filename, fastafile, namesfile, groupfile;
        ofstream out;
        ifstream in, inNames;
        
index 21e7b54548a370065ab554d3a2b0f7b5c4887146..06e7e84f06602796d143cdb8d535e46006dc0fb1 100644 (file)
@@ -42,7 +42,6 @@
 #include "concensuscommand.h"
 #include "distancecommand.h"
 #include "aligncommand.h"
-#include "getrepseqscommand.h"
 #include "matrixoutputcommand.h"
 
 
@@ -91,7 +90,6 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "filter.seqs")                   {   command = new FilterSeqsCommand();          }
                else if(commandName == "venn")                                  {   command = new VennCommand();                                }
                else if(commandName == "bin.seqs")                              {   command = new BinSeqCommand();                              }
-               else if(commandName == "get.repseqs")                   {   command = new GetRepSeqsCommand();                  }
                else if(commandName == "get.oturep")                    {   command = new GetOTURepCommand();                   }
                else if(commandName == "tree.shared")                   {   command = new TreeGroupCommand();                   }
                else if(commandName == "dist.shared")                   {   command = new MatrixOutputCommand();                }
index f82e806e45d6b98be601720b3380ae21e142438a..f8b34bba53ef6260d0c0756dfeec109cc946c9e8 100644 (file)
@@ -277,12 +277,6 @@ bool ErrorCheck::checkInput(string input) {
                        validateBinFiles();
                }
                
-               if ((commandName == "get.repseqs")) { 
-                       if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the get.repseqs command." << endl; return false; }
-                       else if (groupfile == "") { cout << "You must provide a groupfile before you can use the get.repseqs command." << endl; return false; }
-                       validateBinFiles();
-               }
-
                
                if ((commandName == "get.oturep")) { 
                        if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
index 45f34fef5a444eb66e78788b9107152cd2b20a7d..e5f87c25f014afe8be7dda943e6f0b5e3e255908 100644 (file)
@@ -37,6 +37,14 @@ GetOTURepCommand::GetOTURepCommand(){
                
                fastafile = globaldata->getFastaFile();
                namesfile = globaldata->getNameFile();
+               groupfile = globaldata->getGroupFile();
+               
+               if (groupfile != "") {
+                       //read in group map info.
+                       groupMap = new GroupMap(groupfile);
+                       groupMap->readMap();
+               }
+
                openInputFile(fastafile, in);
                
                fasta = new FastaMap();
@@ -59,6 +67,9 @@ GetOTURepCommand::~GetOTURepCommand(){
        delete input;
        delete read;
        delete fasta;
+       if (groupfile != "") {
+               delete groupMap;
+       }
 }
 
 //**********************************************************************************************************************
@@ -98,15 +109,22 @@ int GetOTURepCommand::execute(){
                                
                                //for each bin in the list vector
                                for (int i = 0; i < list->size(); i++) {
-                                       nameRep = FindRep(i);
+                                       string groups;
+                                       nameRep = FindRep(i, groups);
                                        
                                        //print out name and sequence for that bin
                                        sequence = fasta->getSequence(nameRep);
 
                                        if (sequence != "not found") {
-                                               nameRep = nameRep + "|" + toString(i+1);
-                                               out << ">" << nameRep << endl;
-                                               out << sequence << endl;
+                                               if (groupfile == "") {
+                                                       nameRep = nameRep + "|" + toString(i+1);
+                                                       out << ">" << nameRep << endl;
+                                                       out << sequence << endl;
+                                               }else {
+                                                       nameRep = nameRep + "|" + groups + "|" + toString(i+1);
+                                                       out << ">" << nameRep << endl;
+                                                       out << sequence << endl;
+                                               }
                                        }else { 
                                                cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; 
                                                remove(outputFileName.c_str());
@@ -173,7 +191,7 @@ void GetOTURepCommand::readNamesFile() {
        }       
 }
 //**********************************************************************************************************************
-string GetOTURepCommand::FindRep(int bin) {
+string GetOTURepCommand::FindRep(int bin, string& group) {
        try{
                vector<string> names;
                map<string, float> sums;
@@ -182,12 +200,34 @@ string GetOTURepCommand::FindRep(int bin) {
                string binnames;
                float min = 10000;
                string minName;
+               map<string, string> groups;
+               map<string, string>::iterator groupIt;
                
                binnames = list->get(bin);
        
                //parse names into vector
                splitAtComma(binnames, names);
                
+               //if you have a groupfile
+               if(groupfile != "") {
+                       //find the groups that are in this bin
+                       for (int i = 0; i < names.size(); i++) {
+                               string groupName = groupMap->getGroup(names[i]);
+                               if (groupName == "not found") {  
+                                       cout << names[i] << " is missing from your group file. Please correct. " << endl;
+                                       groupError = true;
+                               }else{
+                                       groups[groupName] = groupName;
+                               }
+                       }
+                       
+                       //turn the groups into a string
+                       for(groupIt = groups.begin(); groupIt != groups.end(); groupIt++) { group += groupIt->first + "-"; }
+                       
+                       //rip off last dash
+                       group = group.substr(0, group.length()-1);
+               }
+               
                //if only 1 sequence in bin then that's the rep
                if (names.size() == 1) { return names[0]; }
                else {
index 706b219917eb89c99652b8e024421869eac123c1..deafc78c5512edc5039a0425910aaa979eb8f639 100644 (file)
@@ -18,7 +18,7 @@
 #include "inputdata.h"
 #include "readotu.h"
 #include "fastamap.h"
-
+#include "groupmap.h"
 
 class GlobalData;
 
@@ -39,9 +39,11 @@ private:
        ReadOTUFile* read;
        InputData* input;
        FastaMap* fasta;
-       string filename, fastafile, namesfile;
+       GroupMap* groupMap;
+       string filename, fastafile, namesfile, groupfile;
        ofstream out;
        ifstream in, inNames;
+       bool groupError;
        
         
        map<string, int> nameToIndex;  //maps sequence name to index in sparsematrix
@@ -50,7 +52,7 @@ private:
        map<string, int>::iterator it3;
        
        void readNamesFile();
-       string FindRep(int); // returns name of "representative" sequence of given bin.
+       string FindRep(int, string&); // returns name of "representative" sequence of given bin. //and fill a string containing the groups in that bin if a groupfile is given
 
 };
 
diff --git a/getrepseqscommand.cpp b/getrepseqscommand.cpp
deleted file mode 100644 (file)
index ee9bf5a..0000000
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- *  getrepseqscommand.cpp
- *  Mothur
- *
- *  Created by Sarah Westcott on 5/19/09.
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "getrepseqscommand.h"
-
-//**********************************************************************************************************************
-GetRepSeqsCommand::GetRepSeqsCommand(){
-       try {
-               globaldata = GlobalData::getInstance();
-               fastafile = globaldata->getFastaFile();
-               namesfile = globaldata->getNameFile();
-               openInputFile(fastafile, in);
-               
-               fasta = new FastaMap();
-               
-               //read in group map info.
-               groupMap = new GroupMap(globaldata->getGroupFile());
-               groupMap->readMap();
-                       
-               //fill filehandles with neccessary ofstreams
-               int i;
-               ofstream* temp;
-               //one for each group
-               for (i=0; i<groupMap->getNumGroups(); i++) {
-                       temp = new ofstream;
-                       filehandles[groupMap->namesOfGroups[i]] = temp;
-               }
-               
-               //one for shared
-               temp = new ofstream;
-               string s = "shared";
-               filehandles[s] = temp;
-               
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}
-
-//**********************************************************************************************************************
-
-GetRepSeqsCommand::~GetRepSeqsCommand(){
-       delete input;
-       delete read;
-       delete fasta;
-       delete list;
-}
-
-//**********************************************************************************************************************
-
-int GetRepSeqsCommand::execute(){
-       try {
-               int count = 1;
-               string binnames, name, sequence;
-               
-               //read fastafile
-               fasta->readFastaFile(in);
-               
-               //set format to list so input can get listvector
-               globaldata->setFormat("list");
-               
-               //if user gave a namesfile then use it
-               if (namesfile != "") {
-                       readNamesFile();
-               }
-               
-               //read list file
-               read = new ReadOTUFile(globaldata->getListFile());      
-               read->read(&*globaldata); 
-               
-               input = globaldata->ginput;
-               list = globaldata->gListVector;
-                               
-               while(list != NULL){
-                       
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
-                               
-                               cout << list->getLabel() << '\t' << count << endl;
-                               
-                               //open output list files
-                               for (int i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
-                                       openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]]));
-                                       used[groupMap->namesOfGroups[i]] = false;
-                               }
-                               string s = "shared";
-                               openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s]));
-                               used[s] = false;
-                               
-                               
-                               //for each bin in the list vector
-                               for (int i = 0; i < list->size(); i++) {
-                                       seq.clear();
-                                       //uses this to determine if the bin is unique to one group or if it is shared
-                                       map<string, string> groups;
-
-                                       //determine if this otu is unique to one group or not
-                                       binnames = list->get(i);
-                                       while (binnames.find_first_of(',') != -1) { 
-                                               //parse out each name in bin
-                                               name = binnames.substr(0,binnames.find_first_of(','));
-                                               binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                               
-                                               //do work for that name
-                                               sequence = fasta->getSequence(name);
-                                               if (sequence != "not found") {
-                                                       string group = groupMap->getGroup(name);
-                                                       if (group != "not found") {  groups[group] = group;     }  //add group to list of groups in this bin
-                                                       else {  
-                                                               cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl;
-                                                               removeFiles(list->getLabel());
-                                                               return 0;
-                                                       }
-                                                       name = ">" + name + "|" + toString(i+1);
-                                                       seq[name] = sequence;
-                                               }else { 
-                                                       cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
-                                                       removeFiles(list->getLabel());
-                                                       return 0;
-                                               }
-                                               
-                                       }
-                                       
-                                       //get last name
-                                       sequence = fasta->getSequence(binnames);
-                                       if (sequence != "not found") {
-                                               string group = groupMap->getGroup(binnames);
-                                               if (group != "not found") {  groups[group] = group;     }  //add group to list of groups in this bin
-                                               else {  
-                                                       cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl;
-                                                       removeFiles(list->getLabel());
-                                                       return 0;
-                                               }
-                                               binnames = ">" + binnames + "|" + toString(i+1);  //attach bin number to name
-                                               seq[binnames] = sequence;
-                                       }else { 
-                                               cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
-                                               removeFiles(list->getLabel());
-                                               return 0;
-                                       }
-                                       
-                                       //output each bin to files
-                                       //what file does this bin need to be outputted to 
-                                       if (groups.size() == 1) { //this bin is unique to one group
-                                               it3 = groups.begin();
-                                               string uniqueGroup = it3->first;
-                                               used[uniqueGroup] = true;
-                                               //print out sequences from that bin to shared file
-                                               for (it3 = seq.begin(); it3 != seq.end(); it3++){
-                                                       *(filehandles[uniqueGroup]) << it3->first << endl;
-                                                       *(filehandles[uniqueGroup]) << it3->second << endl;
-                                               }
-                                       }else {//this bin has sequences from multiple groups in it
-                                               used[s] = true;
-                                               //print out sequences from that bin to shared file
-                                               for (it3 = seq.begin(); it3 != seq.end(); it3++){
-                                                       *(filehandles[s]) << it3->first << endl;
-                                                       *(filehandles[s]) << it3->second << endl;
-                                               }
-                                       }
-                               }
-                               
-                               //close ostreams and remove unused files
-                               for (it = filehandles.begin(); it != filehandles.end(); it++) {
-                                       it->second->close();
-                                       if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta";  remove(filename.c_str());  }
-                               }
-
-                       }
-                       
-                       delete list;
-                       list = input->getListVector();
-                       count++;
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}
-
-//**********************************************************************************************************************
-void GetRepSeqsCommand::readNamesFile() {
-       try {
-               vector<string> dupNames;
-               openInputFile(namesfile, inNames);
-               
-               string name, names, sequence;
-       
-               while(inNames){
-                       inNames >> name;                        //read from first column  A
-                       inNames >> names;               //read from second column  A,B,C,D
-                       
-                       dupNames.clear();
-                       
-                       //parse names into vector
-                       splitAtComma(names, dupNames);
-                       
-                       //store names in fasta map
-                       sequence = fasta->getSequence(name);
-                       for (int i = 0; i < dupNames.size(); i++) {
-                               fasta->push_back(dupNames[i], sequence);
-                       }
-               
-                       gobble(inNames);
-               }
-               inNames.close();
-
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}
-//**********************************************************************************************************************
-void GetRepSeqsCommand::removeFiles(string label) {
-       try {
-                       //close ostreams
-                       for (it = filehandles.begin(); it != filehandles.end(); it++) {
-                               it->second->close();
-                       }
-
-                       //remove output files because there was an error
-                       for (int i=0; i<groupMap->getNumGroups(); i++) {
-                               string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta";
-                               remove(outputFileName.c_str());
-                       }
-                       string outputFileName = fastafile + "shared"+ label + ".fasta";
-                       remove(outputFileName.c_str());
-
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}
-
-//**********************************************************************************************************************
-
diff --git a/getrepseqscommand.h b/getrepseqscommand.h
deleted file mode 100644 (file)
index 970b65c..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef GETREPSEQSCOMMAND_H
-#define GETREPSEQSCOMMAND_H
-/*
- *  getrepseqscommand.h
- *  Mothur
- *
- *  Created by Sarah Westcott on 5/19/09.
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-
-#include "command.hpp"
-#include "inputdata.h"
-#include "listvector.hpp"
-#include "readotu.h"
-#include "fastamap.h"
-#include "groupmap.h"
-
-
-class GlobalData;
-
-class GetRepSeqsCommand : public Command {
-       
-public:
-       GetRepSeqsCommand();    
-       ~GetRepSeqsCommand();
-       int execute();  
-       
-private:
-       GlobalData* globaldata;
-       ListVector* list;
-       ReadOTUFile* read;
-       GroupMap* groupMap;
-       InputData* input;
-       FastaMap* fasta;
-       string filename, fastafile, namesfile;
-       map<string, ofstream*> filehandles;
-       map<string, ofstream*>::iterator it;
-       map<string, bool> used;  //group, if it had any unique otus
-       map<string, bool>::iterator it2;
-       map<string, string> seq;
-       map<string, string>::iterator it3;
-       ifstream in, inNames;
-       
-       void readNamesFile();
-       void removeFiles(string);
-};
-
-#endif
index 72ab80da603d1ca3f8ae55430151c25257a614ca..6abe7afbf7ed447a39dd0f0845f1f01af4fc33ce 100644 (file)
@@ -301,32 +301,23 @@ int HelpCommand::execute(){
                cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n";
        }else if (globaldata->helpRequest == "bin.seqs") { 
                cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n";
-               cout << "The bin.seqs command parameters are fasta, name, line and label.  The fasta parameter is required, and you may not use line and label at the same time." << "\n";
+               cout << "The bin.seqs command parameters are fasta, name, line, label and group.  The fasta parameter is required, and you may not use line and label at the same time." << "\n";
                cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
-               cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n";
-               cout << "Example bin.seqs(fasta=amazon.fasta, line=1-3-5, name=amazon.names)." << "\n";
+               cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n";
+               cout << "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n";
                cout << "The default value for line and label are all lines in your inputfile." << "\n";
                cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n";
-               cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
-       }else if (globaldata->helpRequest == "get.repseqs") { 
-               cout << "The get.repseqs command can only be executed after a successful read.otu command of a list file." << "\n";
-               cout << "The get.repseqs command parameters are fasta, name, group, line and label.  The fasta and group parameters are required, and you may not use line and label at the same time." << "\n";
-               cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
-               cout << "The get.repseqss command should be in the following format: get.repseqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupfile, line=yourLines, label=yourLabels)." << "\n";
-               cout << "Example get.repseqs(fasta=amazon.fasta, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n";
-               cout << "The default value for line and label are all lines in your inputfile." << "\n";
-               cout << "The get.repseqs command outputs several .fasta files for each distance you specify.  " << "\n";
-               cout << "If the distance level you choose has bins that contain only sequences unique to a specific group those sequences are outputted to a file for that group." << "\n";
-               cout << "If the bin contains sequences from multiple groups then the bin is outputted to the shared fasta file." << "\n";
+               cout << "If you provide a groupfile, then it also appends the sequences group to the name." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "get.oturep") { 
                cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n";
-               cout << "The get.oturep command parameters are list, fasta, name, line and label.  The fasta and list parameters are required, and you may not use line and label at the same time." << "\n";
+               cout << "The get.oturep command parameters are list, fasta, name, group, line and label.  The fasta and list parameters are required, and you may not use line and label at the same time." << "\n";
                cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
-               cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n";
-               cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, line=1-3-5, name=amazon.names)." << "\n";
+               cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels)." << "\n";
+               cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, line=1-3-5, name=amazon.names)." << "\n";
                cout << "The default value for line and label are all lines in your inputfile." << "\n";
                cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n";
+               cout << "If you provide a groupfile, then it also appends the names of the groups present in that bin." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "quit") {
                cout << "The quit command will terminate mothur and should be in the following format: " << "\n";
index 787cc8025a6c62d01be190a63c33e42429fe21a4..7f7acc2e61044c7ef8f2e8948a112dfd3184cfaf 100644 (file)
@@ -19,7 +19,6 @@ ValidCommands::ValidCommands() {
                commands["read.tree"]                   = "read.tree"; 
                commands["read.seqs"]           = "read.seqs";
                commands["bin.seqs"]                    = "bin.seqs"; 
-               commands["get.repseqs"]                 = "get.repseqs"; 
                commands["get.oturep"]                  = "get.oturep";
                commands["cluster"]                             = "cluster"; 
                commands["deconvolute"]                 = "deconvolute"; 
index 5da87d1df8ee6d568264313ece64bbc7177cae40..6719e2c4eb706db4a99b6620eb861e3f41268971 100644 (file)
@@ -273,16 +273,13 @@ void ValidParameters::initCommandParameters() {
                string vennArray[] =  {"groups","line","label","calc"};
                commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string));
                
-               string binseqsArray[] =  {"fasta","line","label","name"};
+               string binseqsArray[] =  {"fasta","line","label","name", "group"};
                commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string));
                
                string distsharedArray[] =  {"line","label","calc","groups"};
                commandParameters["dist.shared"] = addParameters(distsharedArray, sizeof(distsharedArray)/sizeof(string));
                
-               string getrepseqsArray[] =  {"fasta","line","label","name", "group"};
-               commandParameters["get.repseqs"] = addParameters(getrepseqsArray, sizeof(getrepseqsArray)/sizeof(string));
-               
-               string getOTURepArray[] =  {"fasta","list","line","label","name"};
+               string getOTURepArray[] =  {"fasta","list","line","label","name", "group"};
                commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string));
                
                string treeGroupsArray[] =  {"line","label","calc","groups"};