]> git.donarmstrong.com Git - mothur.git/commitdiff
minor bugs fixes and added line and label options to read.otu's parselist and shared...
authorwestcott <westcott>
Wed, 18 Feb 2009 15:11:01 +0000 (15:11 +0000)
committerwestcott <westcott>
Wed, 18 Feb 2009 15:11:01 +0000 (15:11 +0000)
13 files changed:
deconvolutecommand.cpp
deconvolutecommand.h
engine.cpp
errorchecking.cpp
fastamap.cpp
fastamap.h
globaldata.cpp
helpcommand.cpp
parselistcommand.cpp
parsimonycommand.cpp
parsimonycommand.h
sharedcommand.cpp
validparameter.cpp

index f2dfdd5bae38d5994ff03150013aab9db0ecfb7c..966fee03f90bf4fb87d39efdedf0189cc18f10be 100644 (file)
@@ -17,8 +17,11 @@ int DeconvoluteCommand::execute() {
                //prepare filenames and open files
                filename = globaldata->getFastaFile();
                outputFileName = (getRootName(filename) + "names");
+               outFastafile = (getRootName(filename) + "uni_fasta");
+               
                openInputFile(filename, in);
                openOutputFile(outputFileName, out);
+               openOutputFile(outFastafile, outFasta);
        
                //constructor reads in file and store internally
                fastamap = new FastaMap();
@@ -30,6 +33,7 @@ int DeconvoluteCommand::execute() {
                //file contains 2 columns separated by tabs.  the first column is the groupname(name of first sequence found.
                //the second column is the list of names of identical sequences separated by ','.
                fastamap->print(out);
+               fastamap->printCondensedFasta(outFasta);
        
                return 0;
        }
index b9b208493f2f6fea9a35055c448b65973ddc6a46..2ccc20a633b882475099132269eb0142cf55adcf 100644 (file)
@@ -32,8 +32,8 @@ private:
        GlobalData* globaldata;
        FastaMap* fastamap;
        ifstream in;
-       ofstream out;
-       string filename, outputFileName;
+       ofstream out, outFasta;
+       string filename, outputFileName, outFastafile;
 
 };
 
index 1ca98b021214e624215ec897a549e0f14ed47ca3..e251f0031d5ffa2bc9685702ef76ae0489af8449 100644 (file)
@@ -75,6 +75,8 @@ bool InteractEngine::getInput(){
 
                        cout << endl << "mothur > ";
                        getline(cin, input);
+                       if (cin.eof()) { input = "quit()"; }
+                       
                        errorFree = errorCheckor->checkInput(input);
                        if (errorFree == true) {
                                CommandOptionParser parser(input);
@@ -148,7 +150,9 @@ bool BatchEngine::getInput(){
                while(quitCommandCalled == 0){
                
                        getline(inputBatchFile, input);
-                       cout << endl << "dotur > " << input << endl;
+                       if (inputBatchFile.eof()) { input = "quit()"; }
+                       
+                       cout << endl << "mothur > " << input << endl;
                        errorFree = errorCheckor->checkInput(input);
                        if (errorFree == true) {
                                CommandOptionParser parser(input);
index 7f581d9e91fd4ef1352e866180f36a4aefccc69f..1d60a4a35a8e7252e8f318a4997f91abdf102a51 100644 (file)
@@ -102,7 +102,7 @@ bool ErrorCheck::checkInput(string input) {
                                if (parameter == "fileroot" )           { fileroot = value; }
                                if (parameter == "line" )                       { line = value; }
                                if (parameter == "label" )                      { label = value; }
-                               if (parameter == "randomtree" )         { randomtree = value;   }
+                               if (parameter == "random" )                     { randomtree = value;   }
 
                        }
                        
@@ -133,7 +133,7 @@ bool ErrorCheck::checkInput(string input) {
                                if (parameter == "fileroot" )           { fileroot = value; }
                                if (parameter == "line" )                       { line = value; }
                                if (parameter == "label" )                      { label = value; }
-                               if (parameter == "randomtree" )         { randomtree = value;   }
+                               if (parameter == "random" )                     { randomtree = value;   }
 
                        }
                }
@@ -141,9 +141,6 @@ bool ErrorCheck::checkInput(string input) {
                //make sure the user does not use both the line and label parameters
                if ((line != "") && (label != "")) { cout << "You may use either the line or label parameters, but not both." << endl; return false; }
                
-               //make sure you have a valid random tree value
-               if ((randomtree != "0") && (randomtree != "1")) { cout << randomtree << " is not a valid randomtree value.  Valid values for randomtree are 0, (meaning you have read your own trees) or 1 (meaning you want to random distribution of trees)." << endl; return false; }
-               
                if (commandName == "read.dist") { 
                        validateReadFiles();
                        validateReadDist();
@@ -174,9 +171,9 @@ bool ErrorCheck::checkInput(string input) {
                
                if (commandName == "parsimony") {
                        //are you trying to use parsimony without reading a tree or saying you want random distribution
-                       if (randomtree == "0")  {
+                       if (randomtree == "")  {
                                if (globaldata->gTree.size() == 0) {
-                                       cout << "You must read a treefile and a groupfile or set the randomtree parameter to 1, before you may execute the parsimony command." << endl; return false;  }
+                                       cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; return false;  }
                        }
                }
                
index 0e6c22e21f48aec62b4a365db1e2454cd65ce7c9..14065231e96f4a7303921424ab53ce633e0984ec 100644 (file)
@@ -107,3 +107,22 @@ void FastaMap::print(ostream& out){ //prints data
        }
 }
 /*******************************************************************************/
+void FastaMap::printCondensedFasta(ostream& out){ //prints data
+       try {
+               // two column file created with groupname and them list of identical sequence names
+               for (it = data.begin(); it != data.end(); it++) {
+                       out << ">" << it->second.groupname << endl;
+                       out << it->first << endl;
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/*******************************************************************************/
+
index 864a2d03951a9892919dfbdc8641a156e4ef5134..8ba2fc6ec3e9507a018084080d6dd6275b1203d1 100644 (file)
@@ -38,6 +38,7 @@ public:
        void clear();
        int size();                                     //returns number of unique sequences
        void print(ostream&);           //produces a 2 column file with the groupname in the first column and the names in the second column.
+       void printCondensedFasta(ostream&);             //produces a fasta file.
        void readFastaFile(ifstream&);
 
 private:
index 9b740a68c1a6b945d4f12789a5ee125e308f2c8c..eeb1f4d5c51d36d5f38942f6c0b328a7386eb20b 100644 (file)
@@ -67,7 +67,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                allLines = 1;
                commandName = commandString; //save command name to be used by other classes
                
-               //set all non filename paramters to default values
+               //set all non filename paramters to default
                reset();
                
                //clears out data from previous read
@@ -75,6 +75,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        clear();
                        gGroupmap = NULL;
                        gTree.clear();
+                       labels.clear(); lines.clear(); groups.clear();
+                       
                }
                
                //saves help request
@@ -107,7 +109,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                                if (key == "freq" )                     { freq = value;                 }
                                if (key == "method" )           { method = value;               }
                                if (key == "fileroot" )         { fileroot = value;             }
-                               if (key == "randomtree" )       { randomtree = value;   }
+                               if (key == "random" )           { randomtree = value;   }
                                if (key == "groups" )           { groups = value;       }
                                if (key == "calc")                      { calc = value;         }
                                
@@ -153,7 +155,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        if (key == "freq" )                     { freq = value;                 }
                        if (key == "method" )           { method = value;               }
                        if (key == "fileroot" )         { fileroot = value;             }
-                       if (key == "randomtree" )       { randomtree = value;   }
+                       if (key == "random" )           { randomtree = value;   }
                        if (key == "groups" )           { groups = value;       }
                        if (key == "calc")                      { calc = value;         }
 
@@ -296,7 +298,7 @@ void GlobalData::clear() {
        label                   =       "";
        groups                  =       "";
        jumble                  =       "1";    //0 means don't jumble, 1 means jumble.
-       randomtree              =       "0";  //0 means user will enter some user trees, 1 means they just want the random tree distribution.
+       randomtree              =       "";  //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile.
        freq                    =       "100";
        method                  =       "furthest";
        fileroot                =       "";
@@ -313,7 +315,7 @@ void GlobalData::reset() {
        label                   =       "";
        groups                  =       "";
        jumble                  =       "1";    //0 means don't jumble, 1 means jumble.
-       randomtree              =       "0";  //0 means user will enter some user trees, 1 means they just want the random tree distribution.
+       randomtree              =       "";  //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile.
        freq                    =       "100";
        method                  =       "furthest";
        calc                    =       "";
index 0abab5a0fe25f7d4f0e5a100b379bf062f4ffb25..e5247f90f5dce3a760f39614f77cb4c152ec53f9 100644 (file)
@@ -34,12 +34,14 @@ int HelpCommand::execute(){
        }else if (globaldata->helpRequest == "read.otu") {
                cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n";
                cout << "collect.shared, rarefaction.shared or summary.shared command.   Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n";
-               cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group and order." << "\n";
+               cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group, order, line and label." << "\n";
                cout << "The read.otu command can be used in two ways.  The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single." << "\n";
-               cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile)." << "\n";
+               cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels)." << "\n";
                cout << "The list, rabund or sabund parameter is required, but you may only use one of them." << "\n";
+               cout << "The line and label parameters are optional but you may not use both the line and label parameters at the same time." << "\n";
+               cout << "The label and line parameters are used to read specific lines in your input." << "\n";
                cout << "The second way to use the read.otu command is to read a list and a group so you can use the collect.shared, rarefaction.shared or summary.shared commands." << "\n";
-               cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile).  " << "\n";
+               cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile, line=yourLines).  " << "\n";
                cout << "The list parameter and group paramaters are required. When using the command the second way read.otu command parses the .list file" << "\n";
                cout << "and separates it into groups.  It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n";
                cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
@@ -125,13 +127,13 @@ int HelpCommand::execute(){
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "parsimony") { 
-               cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the randomtree parameter." << "\n";
-               cout << "The parsimony command parameters are randomtree and iters.  No parameters are required." << "\n";
-               cout << "The parsimony command should be in the following format: parsimony(randomtree=yourRandomTreeValue, iters=yourIters)." << "\n";
-               cout << "Example parsimony(randomtree=1, iters=500)." << "\n";
-               cout << "The default value for randomTree is 0 (meaning you want to use the trees in your inputfile, randomtree=1 means you just want the random distribution of trees)," << "\n";
+               cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n";
+               cout << "The parsimony command parameters are random and iters.  No parameters are required." << "\n";
+               cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, iters=yourIters)." << "\n";
+               cout << "Example parsimony(random=out, iters=500)." << "\n";
+               cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n";
                cout << "and iters is 1000.  The parsimony command output three files: .parsimony, .psummary and .pdistrib, their descriptions are in the manual." << "\n";
-               cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+               cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "unifrac.weighted") { 
                cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n";
                cout << "The unifrac.weighted command parameters are groups and iters.  No parameters are required." << "\n";
index 5379052d6dd4f1bbfe51b3b346874def78a7b841..d9925ad40ecd02cc1580e7d087b6b5acb3514831 100644 (file)
@@ -86,6 +86,7 @@ void ParseListCommand::parse(int index) {
 int ParseListCommand::execute(){
        try{
                        globaldata = GlobalData::getInstance();
+                       int count = 1;
                        
                        //read in listfile
                        read = new ReadPhilFile(globaldata->inputFileName);     
@@ -107,22 +108,30 @@ int ParseListCommand::execute(){
                        //parses and sets each groups listvector
                        while(list != NULL){
                                label = list->getLabel();
-                               for(i=0; i<list->size(); i++) {
-                                       parse(i); //parses data[i] list of sequence names
-                                       for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
-                                               seq = it->second;
-                                               seq = seq.substr(1, seq.length()); //rips off extra comma
-                                               groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
+                               
+                               if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(label) == 1){
+                               
+                                       for(i=0; i<list->size(); i++) {
+                                               parse(i); //parses data[i] list of sequence names
+                                               for (it=listGroups.begin(); it != listGroups.end(); it++) {  //loop through map and set new list vectors
+                                                       seq = it->second;
+                                                       seq = seq.substr(1, seq.length()); //rips off extra comma
+                                                       groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
+                                               }
+                                               listGroups.clear();
                                        }
-                                       listGroups.clear();
-                               }
-                               //prints each new list file
-                               for (i=0; i<groupMap->getNumGroups(); i++) {
-                                       groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
-                                       groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
-                                       groupOfLists[groupMap->namesOfGroups[i]]->clear();
+                                       //prints each new list file
+                                       for (i=0; i<groupMap->getNumGroups(); i++) {
+                                               groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
+                                               groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
+                                               groupOfLists[groupMap->namesOfGroups[i]]->clear();
+                                       }
+                                       
+                                       cout << label << '\t' << count << endl;
                                }
+                               
                                list = input->getSharedListVector();
+                               count++;
                        }
                        
                        //set groupmap for .shared commands
index 4632019151fa0a93d7dba5c14017940d04aa1653..989ea120447540884daac7f4f5e5496946b86d52 100644 (file)
@@ -15,10 +15,10 @@ ParsimonyCommand::ParsimonyCommand() {
                globaldata = GlobalData::getInstance();
                
                //randomtree will tell us if user had their own treefile or if they just want the random distribution
-               convert(globaldata->getRandomTree(), randomtree);
+               randomtree = globaldata->getRandomTree();
                
                //user has entered their own tree
-               if (randomtree == 0) { 
+               if (randomtree == "") { 
                        T = globaldata->gTree;
                        tmap = globaldata->gTreemap;
                        parsFile = globaldata->getTreeFile() + ".parsimony";
@@ -29,8 +29,9 @@ ParsimonyCommand::ParsimonyCommand() {
                        openOutputFile(distFile, outDist);
 
                }else { //user wants random distribution
+                       savetmap = globaldata->gTreemap;
                        getUserInput();
-                       parsFile = "rd_parsimony";
+                       parsFile = randomtree + ".rd_parsimony";
                        openOutputFile(parsFile, out);
                }
                
@@ -59,7 +60,7 @@ int ParsimonyCommand::execute() {
                outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
                outDist << "RandomTree#" << '\t' << "ParsScore" << endl;
                
-               if (randomtree == 0) {
+               if (randomtree == "") {
                        //get pscores for users trees
                        for (int i = 0; i < T.size(); i++) {
                                cout << "Processing tree " << i+1 << endl;
@@ -133,7 +134,7 @@ int ParsimonyCommand::execute() {
                
                //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
                for (it = validScores.begin(); it != validScores.end(); it++) { 
-                       if (randomtree == 0) {
+                       if (randomtree == "") {
                                it2 = uscoreFreq.find(it->first);
                                //user data has that score 
                                if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second;  }
@@ -158,8 +159,11 @@ int ParsimonyCommand::execute() {
                printParsimonyFile();
                printUSummaryFile();
                
-               //reset randomTree parameter to 0
-               globaldata->setRandomTree("0");
+               //reset globaldata's treemap if you just did random distrib
+               if (randomtree != "") { globaldata->gTreemap = savetmap; }
+               
+               //reset randomTree parameter to ""
+               globaldata->setRandomTree("");
                
                return 0;
                
@@ -178,7 +182,7 @@ int ParsimonyCommand::execute() {
 void ParsimonyCommand::printParsimonyFile() {
        try {
                //column headers
-               if (randomtree == 0) {
+               if (randomtree == "") {
                        out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
                }else {
                        out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
@@ -189,7 +193,7 @@ void ParsimonyCommand::printParsimonyFile() {
                
                //print each line
                for (it = validScores.begin(); it != validScores.end(); it++) { 
-                       if (randomtree == 0) {
+                       if (randomtree == "") {
                                out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; 
                        }else{
                                out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;      
index 5429e77acf8411bd3836e3cc6fa3b1a23db24a5a..e854294834af55cb0cbe97ec035a0e01f78b66a3 100644 (file)
@@ -33,10 +33,11 @@ class ParsimonyCommand : public Command {
                GlobalData* globaldata;
                vector<Tree*> T;           //user trees
                Tree* randT;  //random tree
-               TreeMap* tmap;
+               TreeMap* tmap; 
+               TreeMap* savetmap;
                Parsimony* pars;
-               string parsFile, sumFile, distFile;
-               int iters, randomtree, numGroups;
+               string parsFile, sumFile, distFile, randomtree;
+               int iters, numGroups;
                vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
                vector<float> userTreeScores; //scores for users trees
                vector<float> UScoreSig;  //tree score signifigance when compared to random trees - percentage of random trees with that score or lower.
index 0cc1e22f2d9067fb7fcad21b6cff5c456f5e7049..a797d5408d151beddc73be085111bbcf41cd84cd 100644 (file)
@@ -46,9 +46,14 @@ int SharedCommand::execute(){
                shared = new Shared();
                int i = 0;
                while(SharedList != NULL){
-                       shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector
+               
+                       if(globaldata->allLines == 1 || globaldata->lines.count(i+1) == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
+                       
+                               shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector
+                               printSharedData(); //prints info to the .shared file
+                       }
+                       
                        SharedList = input->getSharedListVector(); //get new list vector to process
-                       printSharedData(); //prints info to the .shared file
                        i++;
                }
                return 0;
index 1c6cd60143c95fff34c531615b108d5d73925be4..a82962f1e0096e406e7cc21d03dba08a3039ea3a 100644 (file)
@@ -40,7 +40,7 @@ ValidParameters::ValidParameters() {
                parameters["shared"]                    = "shared"; 
                parameters["summary"]                   = "summary"; 
                parameters["sharedsummary"]             = "sharedsummary";
-               parameters["randomtree"]                = "randomtree";
+               parameters["random"]                    = "random";
                parameters["groups"]                    = "groups";
                parameters["calc"]                              = "calc";