]> git.donarmstrong.com Git - mothur.git/commitdiff
added groups option to read.otu, added qtrim option to trim.seqs, fixed bug in get...
authorwestcott <westcott>
Mon, 2 Nov 2009 15:43:43 +0000 (15:43 +0000)
committerwestcott <westcott>
Mon, 2 Nov 2009 15:43:43 +0000 (15:43 +0000)
getsharedotucommand.cpp
hcluster.cpp
hcluster.h
hclustercommand.cpp
mothur.h
sharedcommand.cpp
sharedcommand.h
sharedlistvector.cpp
sharedrabundvector.h
trimseqscommand.cpp
trimseqscommand.h

index 56f030d235212dec72677e15431fef6a6edbcad9..6d134728899b2a0b127be542e3f26547022d0fb3 100644 (file)
@@ -93,6 +93,7 @@ void GetSharedOTUCommand::help(){
                mothurOut("The get.sharedotu command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n");
                mothurOut("The get.sharedotu command should be in the following format: get.sabund(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n");
                mothurOut("Example get.sharedotu(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n");
+               mothurOut("The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n");
                mothurOut("The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n");
                mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
        }
@@ -227,6 +228,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                openOutputFile(outputFileNames, outNames);
                
                bool wroteSomething = false;
+               int num = 0;
                                
                //go through each bin, find out if shared
                for (int i = 0; i < shared->getNumBins(); i++) {
@@ -248,7 +250,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                                //find group
                                string seqGroup = groupMap->getGroup(name);
                                if (output != "accnos") {
-                                       namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i)));
+                                       namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i+1)));
                                }else {  namesOfSeqsInThisBin.push_back(name);  }
 
                                if (seqGroup == "not found") { mothurOut(name + " is not in your groupfile. Please correct."); mothurOutEndLine(); exit(1);  }
@@ -264,7 +266,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                        if (sharedByAll) {
                                string seqGroup = groupMap->getGroup(names);
                                if (output != "accnos") {
-                                       namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i)));
+                                       namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
                                }else {  namesOfSeqsInThisBin.push_back(names); }
                                
                                if (seqGroup == "not found") { mothurOut(names + " is not in your groupfile. Please correct."); mothurOutEndLine(); exit(1);  }
@@ -285,6 +287,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                        if (sharedByAll) {
                                
                                wroteSomething = true;
+                               num++;
                                
                                //output list of names 
                                for (int j = 0; j < namesOfSeqsInThisBin.size(); j++) {
@@ -310,7 +313,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                
                if (!wroteSomething) {
                        remove(outputFileNames.c_str());
-                       string outputString = " - No otus shared by groups";
+                       string outputString = "\t" + toString(num) + " - No otus shared by groups";
                        
                        string groupString = "";
                        for (int h = 0; h < Groups.size(); h++) {
@@ -319,7 +322,7 @@ void GetSharedOTUCommand::process(ListVector* shared) {
                        
                        outputString += groupString + ".";
                        mothurOut(outputString); mothurOutEndLine();
-               }else { mothurOutEndLine(); }
+               }else { mothurOut("\t" + toString(num)); mothurOutEndLine(); }
                
                //if fasta file provided output new fasta file
                if ((fastafile != "") && wroteSomething) {      
index 4e83777751fa50686903758f41dd61fcdd53c52f..2d980d73723dba0b712fc0025d975520061e9180 100644 (file)
@@ -235,19 +235,17 @@ void HCluster::updateArrayandLinkTable() {
                        }
 //cout << "here3" << endl;                     
                        linkTable[colSpot].erase(size);
-                       linkTable.erase(linkTable.begin()+rowSpot);  //delete col
+                       //linkTable.erase(linkTable.begin()+rowSpot);  //delete row
        //printInfo();          
                        //update activerows
                        activeLinks.erase(smallRow);
                        activeLinks.erase(smallCol);
-                       
-                       if(rowSpot>colSpot) {   activeLinks[size] = colSpot;    }
-                       else{   activeLinks[size] = colSpot;  }
+                       activeLinks[size] = colSpot;
                        
                        //adjust everybody elses spot since you deleted - time vs. space
-                       for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
-                               if (it->second > rowSpot) {  activeLinks[it->first]--;  }
-                       }
+                       //for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
+                               //if (it->second > rowSpot) {  activeLinks[it->first]--;        }
+                       //}
                        
 //cout << "here4" << endl;
        
index ff02b1caff85b8fc48757d3d2b018b3b997ff7f4..6b2862b7919b8d74e7a28a0442ef10bceb863db0 100644 (file)
 
 class RAbundVector;
 class ListVector;
-/************************************************************/
-struct clusterNode {
-       int numSeq;
-       int parent;
-       int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
-       clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
-};
 
 /***********************************************************************/
 class HCluster {
        
 public:
        HCluster(RAbundVector*, ListVector*);
+       ~HCluster(){};
     bool update(int, int, float);
        //string getTag();
 
index 6e3ec243addd289a6d44226adeddd6a3fccbb0d2..e140f8c3489cf45615a200609caa08d00a23979d 100644 (file)
@@ -168,8 +168,6 @@ int HClusterCommand::execute(){
                        mothurOut("Error: no list vector!"); mothurOutEndLine(); return 0;
                }
                
-               
-               
                float previousDist = 0.00000;
                float rndPreviousDist = 0.00000;
                oldRAbund = *rabund;
@@ -260,6 +258,7 @@ int HClusterCommand::execute(){
                rabundFile.close();
                listFile.close();
                
+               delete cluster;
                //if (isTrue(timing)) {
                        mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster. "); mothurOutEndLine();
                //}
index 84dbdb3dee2879829909b38654c80057d9d5fdb5..23b9f189c992f97e2e9a47823cacce62db363696 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -78,6 +78,14 @@ struct ThreadNode {
        IntNode* right;
 };
 
+/************************************************************/
+struct clusterNode {
+       int numSeq;
+       int parent;
+       int smallChild; //used to make linkTable work with list and rabund. represents bin number of this cluster node
+       clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
+};
+
 /***********************************************************************/
 
 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
index 727a04b273fc3ee4b4c9c5882f55e746ad2c0d6a..0dcdfc0143b12ae0c827086465a16bfb486dc66b 100644 (file)
@@ -23,20 +23,27 @@ SharedCommand::SharedCommand(){
                
                groupMap = globaldata->gGroupmap;
                
+               //if hte user has not specified any groups then use them all
+               if (globaldata->Groups.size() == 0) {
+                       groups = groupMap->namesOfGroups;
+               }else{ //they have specified groups
+                       groups = globaldata->Groups;
+               }
+               
                //fill filehandles with neccessary ofstreams
                int i;
                ofstream* temp;
-               for (i=0; i<groupMap->getNumGroups(); i++) {
+               for (i=0; i<groups.size(); i++) {
                        temp = new ofstream;
-                       filehandles[groupMap->namesOfGroups[i]] = temp;
+                       filehandles[groups[i]] = temp;
                }
                
                //set fileroot
                fileroot = getRootName(globaldata->getListFile());
                
                //clears file before we start to write to it below
-               for (int i=0; i<groupMap->getNumGroups(); i++) {
-                       remove((fileroot + groupMap->namesOfGroups[i] + ".rabund").c_str());
+               for (int i=0; i<groups.size(); i++) {
+                       remove((fileroot + groups[i] + ".rabund").c_str());
                }
 
        }
@@ -64,7 +71,7 @@ int SharedCommand::execute(){
                string lastLabel = SharedList->getLabel();
                vector<SharedRAbundVector*> lookup; 
                
-               if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) {  
+               if ((globaldata->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
                        mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
                        
                        out.close();
@@ -82,15 +89,36 @@ int SharedCommand::execute(){
                        return 1; 
                }
                
+               //if user has specified groups make new groupfile for them
+               if (globaldata->Groups.size() != 0) { //make new group file
+                       string groups = "";
+                       for (int i = 0; i < globaldata->Groups.size(); i++) {
+                               groups += globaldata->Groups[i] + ".";
+                       }
+               
+                       string newGroupFile = getRootName(globaldata->inputFileName) + groups + "groups";
+                       ofstream outGroups;
+                       openOutputFile(newGroupFile, outGroups);
+               
+                       vector<string> names = groupMap->getNamesSeqs();
+                       string groupName;
+                       for (int i = 0; i < names.size(); i++) {
+                               groupName = groupMap->getGroup(names[i]);
+                               if (isValidGroup(groupName, globaldata->Groups)) {
+                                       outGroups << names[i] << '\t' << groupName << endl;
+                               }
+                       }
+                       outGroups.close();
+               }
+               
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = globaldata->labels;    
                
                while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
                        
-
                        if(globaldata->allLines == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
-                                       
+                       
                                        lookup = SharedList->getSharedRAbundVector();
                                        mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
                                        
@@ -280,3 +308,20 @@ SharedCommand::~SharedCommand(){
 }
 
 //**********************************************************************************************************************
+
+bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
+       try {
+               for (int i = 0; i < groups.size(); i++) {
+                       if (groupname == groups[i]) { return true; }
+               }
+               
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "SharedCommand", "isValidGroup");
+               exit(1);
+       }
+}
+/************************************************************/
+
+
index f0c24884f05d36f3ed5981ef0ef75a7c57e17a8a..b305e9a7d9f3256eef03cac34387caea99f4b4ae 100644 (file)
@@ -34,11 +34,14 @@ public:
 private:
        void printSharedData(vector<SharedRAbundVector*>);
        void createMisMatchFile();
+       bool isValidGroup(string, vector<string>);
+       
        GlobalData* globaldata;
        ReadOTUFile* read;
        SharedListVector* SharedList;
        InputData* input;
        GroupMap* groupMap;
+       vector<string> groups;
        ofstream out;
        string filename, fileroot;
        bool firsttime;
index a113ebe7e82e93f135541b0bfa0ffc7a3b1d33b1..6cac9b1d9512a16d15eef553d351de0b16771b6e 100644 (file)
@@ -264,22 +264,23 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                SharedUtil* util;
                util = new SharedUtil();
                vector<SharedRAbundVector*> lookup;
+               vector<SharedRAbundVector*> lookup2;
                map<string, SharedRAbundVector*> finder;
+               map<string, SharedRAbundVector*>::iterator it;
                string group, names, name;
-               
+       
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-               
                delete util;
 
-               for (int i = 0; i < globaldata->Groups.size(); i++) {
+               for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) {
                        SharedRAbundVector* temp = new SharedRAbundVector(data.size());
-                       finder[globaldata->Groups[i]] = temp;
-                       finder[globaldata->Groups[i]]->setLabel(label);
-                       finder[globaldata->Groups[i]]->setGroup(globaldata->Groups[i]);
+                       finder[globaldata->gGroupmap->namesOfGroups[i]] = temp;
+                       finder[globaldata->gGroupmap->namesOfGroups[i]]->setLabel(label);
+                       finder[globaldata->gGroupmap->namesOfGroups[i]]->setGroup(globaldata->gGroupmap->namesOfGroups[i]);
                        //*temp = getSharedRAbundVector(globaldata->Groups[i]);
-                       lookup.push_back(finder[globaldata->Groups[i]]);
+                       lookup.push_back(finder[globaldata->gGroupmap->namesOfGroups[i]]);
                }
-               
+//cout << "after blanks" << endl;              
                //fill vectors
                for(int i=0;i<numBins;i++){
                        names = get(i);  
@@ -287,18 +288,30 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                                name = names.substr(0,names.find_first_of(','));
                                names = names.substr(names.find_first_of(',')+1, names.length());
                                group = groupmap->getGroup(name);
+//cout << i << '\t' << name << '\t' << group << endl;
                                if(group == "not found") {      mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine();  exit(1); }
                                finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
                        }
                        
                        //get last name
                        group = groupmap->getGroup(names);
+//cout << i << '\t' << names << '\t' << group << endl;
                        if(group == "not found") {      mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); mothurOutEndLine();  exit(1); }
                        finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
                        
                }
                
-               return lookup;
+               if (globaldata->Groups.size() == globaldata->gGroupmap->namesOfGroups.size()) { //no groups specified
+                       lookup2 = lookup;
+               }else{ //delete unwanted groups
+                       for (int i = 0; i < globaldata->Groups.size(); i++) {
+                               SharedRAbundVector* temp = new SharedRAbundVector(*finder[globaldata->Groups[i]]);
+                               lookup2.push_back(temp);
+                               delete finder[globaldata->Groups[i]];  //so we don't get dup memory
+                       }
+               }
+               
+               return lookup2;
        }
        catch(exception& e) {
                errorOut(e, "SharedListVector", "getSharedRAbundVector");
index 0c71394689e8aad4d522e5f1ba6948466b218f56..14ec1b8e95d1aebea828c804ea1abc2e75b27217 100644 (file)
@@ -30,7 +30,7 @@ public:
        SharedRAbundVector();
        SharedRAbundVector(int);
        //SharedRAbundVector(string, vector<int>);
-       SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){};
+       SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group), index(bv.index){};
     SharedRAbundVector(ifstream&);
        ~SharedRAbundVector();
 
index 455cdc29aa47ecb1ca26a7f9b4021bef151d6314..5ca70a99cb4b600780b425ff7f29b909794cc362 100644 (file)
@@ -21,7 +21,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"};
+                       string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles", "qtrim"};
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
@@ -72,6 +72,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option){
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        convert(temp, qThreshold);
+                       
+                       temp = validParameter.validFile(parameters, "qtrim", false);    if (temp == "not found") { temp = "F"; }
+                       qtrim = isTrue(temp);
 
                        temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
                        convert(temp, qAverage);
@@ -104,7 +107,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option){
 void TrimSeqsCommand::help(){
        try {
                mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
-               mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n");
+               mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n");
                mothurOut("The fasta parameter is required.\n");
                mothurOut("The flip parameter .... The default is 0.\n");
                mothurOut("The oligos parameter .... The default is "".\n");
@@ -112,11 +115,17 @@ void TrimSeqsCommand::help(){
                mothurOut("The maxhomop parameter .... The default is 0.\n");
                mothurOut("The minlength parameter .... The default is 0.\n");
                mothurOut("The maxlength parameter .... The default is 0.\n");
+               mothurOut("The qfile parameter .....\n");
+               mothurOut("The qthreshold parameter .... The default is 0.\n");
+               mothurOut("The qaverage parameter .... The default is 0.\n");
+               mothurOut("The allfiles parameter .... The default is F.\n");
+               mothurOut("The qtrim parameter .... The default is F.\n");
                mothurOut("The trim.seqs command should be in the following format: \n");
                mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
                mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");       
                mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
-               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
+               mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
 
        }
        catch(exception& e) {
@@ -170,6 +179,9 @@ int TrimSeqsCommand::execute(){
                        if(qFileName != ""){
                                if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
                                else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
+                               if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
+                                       success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+                               }
                                if(!success)                    {       trashCode += 'q';                                                               }
                        }
                        if(barcodes.size() != 0){
index 87b813ff9afa8575855c5cca69916de7287f9d9f..ca029616870f97a31482a40497c645e240ae72db 100644 (file)
@@ -36,7 +36,7 @@ private:
        bool abort;
        string fastaFile, oligoFile, qFileName;
        
-       bool flip, allFiles;
+       bool flip, allFiles, qtrim;
        int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, qThreshold, qAverage;
        vector<string> forPrimer, revPrimer;
        map<string, int> barcodes;