]> git.donarmstrong.com Git - mothur.git/commitdiff
added namefile to align.check and added seqs from namefile to optimizing piece of...
authorwestcott <westcott>
Fri, 11 Mar 2011 15:54:35 +0000 (15:54 +0000)
committerwestcott <westcott>
Fri, 11 Mar 2011 15:54:35 +0000 (15:54 +0000)
mothurout.cpp
mothurout.h
screenseqscommand.cpp
screenseqscommand.h
secondarystructurecommand.cpp
secondarystructurecommand.h
seqsummarycommand.cpp
seqsummarycommand.h

index 27b36ce5ff83dceadeb77e84045caef94d5168c9..96e7bb83e168a335472d77e9dae77359d2889951 100644 (file)
@@ -1096,6 +1096,36 @@ float MothurOut::ceilDist(float dist, int precision){
                exit(1);
        }
 }
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile) { 
+       try {
+               
+               map<string, int> nameMap;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol >> secondCol; gobble(in);
+                       
+                       int num = getNumNames(secondCol);
+                       
+                       nameMap[firstCol] = num;
+               }
+               in.close();
+               
+               return nameMap;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
 
 /***********************************************************************/
 
index a1d950661d8d0f3f8084859a85110db97846cd4d..750973fde12ecffe585f354fc413612fc50f21b6 100644 (file)
@@ -59,6 +59,7 @@ class MothurOut {
                string getline(istringstream&);
                void gobble(istream&);
                void gobble(istringstream&);
+               map<string, int> readNames(string);
                
                //searchs and checks
                bool checkReleaseVersion(ifstream&, string);
index 150a27e54273dfcf7c61d8c1582efe794e0362ba..937959c9c1458fffd66d4af72e8525fcf0ae389d 100644 (file)
@@ -144,7 +144,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        else if (groupfile == "not found") { groupfile = ""; }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }    
 
                        alignreport = validParameter.validFile(parameters, "alignreport", true);
@@ -248,9 +248,13 @@ int ScreenSeqsCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               //if the user want to optimize we need to no the 90% mark
+               //if the user want to optimize we need to know the 90% mark
                vector<unsigned long int> positions;
-               if (optimize.size() != 0) {  getSummary(positions); } //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
+               if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
+                       //use the namefile to optimize correctly
+                       if (namefile != "") { nameMap = m->readNames(namefile); }
+                       getSummary(positions); 
+               } 
                else { 
                        positions = m->divideFile(fastafile, processors);
                        for (int i = 0; i < (positions.size()-1); i++) {
@@ -625,7 +629,8 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long int>& positions){
                sort(ambigBases.begin(), ambigBases.end());
                sort(longHomoPolymer.begin(), longHomoPolymer.end());
                
-               int criteriaPercentile  = int(numSeqs * (criteria / (float) 100));
+               //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
+               int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
                
                for (int i = 0; i < optimize.size(); i++) {
                        if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); }
@@ -662,11 +667,24 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                        Sequence current(in); m->gobble(in);
        
                        if (current.getName() != "") {
-                               startPosition.push_back(current.getStartPos());
-                               endPosition.push_back(current.getEndPos());
-                               seqLength.push_back(current.getNumBases());
-                               ambigBases.push_back(current.getAmbigBases());
-                               longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               int num = 1;
+                               if (namefile != "") {
+                                       //make sure this sequence is in the namefile, else error 
+                                       map<string, int>::iterator it = nameMap.find(current.getName());
+                                       
+                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else { num = it->second; }
+                               }
+                               
+                               //for each sequence this sequence represents
+                               for (int i = 0; i < num; i++) {
+                                       startPosition.push_back(current.getStartPos());
+                                       endPosition.push_back(current.getEndPos());
+                                       seqLength.push_back(current.getNumBases());
+                                       ambigBases.push_back(current.getAmbigBases());
+                                       longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               }
+                               
                                count++;
                        }
                        
@@ -712,6 +730,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                                m->openOutputFile(tempFile, out);
                                
                                out << num << endl;
+                               out << startPosition.size() << endl;
                                for (int k = 0; k < startPosition.size(); k++)          {               out << startPosition[k] << '\t'; }  out << endl;
                                for (int k = 0; k < endPosition.size(); k++)            {               out << endPosition[k] << '\t'; }  out << endl;
                                for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
@@ -744,6 +763,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                        
                        int temp, tempNum;
                        in >> tempNum; m->gobble(in); num += tempNum;
+                       in >> tempNum; m->gobble(in);
                        for (int k = 0; k < tempNum; k++)                       {               in >> temp; startPosition.push_back(temp);              }               m->gobble(in);
                        for (int k = 0; k < tempNum; k++)                       {               in >> temp; endPosition.push_back(temp);                }               m->gobble(in);
                        for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               m->gobble(in);
index 81b915efcbf97e4b91bfca3bf27353da8f9ea8e9..45c9db5de13690840b5eede966c568aeda9a067e 100644 (file)
@@ -53,6 +53,8 @@ private:
        vector<string> outputNames;
        vector<string> optimize;
        map<string, vector<string> > outputTypes;
+       map<string, int> nameMap;
+       int readNames();
        
        int getSummary(vector<unsigned long int>&);
        int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
index c9002cab4a458c1ef9fbb3a2acb85706e01513dc..2b474c14d22389778078d6012819fd1e9f0fc7e7 100644 (file)
@@ -13,7 +13,7 @@
 //**********************************************************************************************************************
 vector<string> AlignCheckCommand::getValidParameters(){        
        try {
-               string Array[] =  {"fasta","map", "outputdir","inputdir"};
+               string Array[] =  {"fasta", "name","map", "outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -69,7 +69,7 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","map", "outputdir","inputdir"};
+                       string Array[] =  {"fasta","name","map", "outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -107,6 +107,14 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["map"] = inputDir + it->second;              }
                                }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
                        }
 
                        //check for required parameters
@@ -118,6 +126,10 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                        if (fastafile == "not open") { abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  m->mothurOut("You must provide an fasta file."); m->mothurOutEndLine(); abort = true;  } 
                        
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { namefile = ""; abort = true; }
+                       else if (namefile == "not found") { namefile = "";  }   
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
@@ -159,6 +171,10 @@ int AlignCheckCommand::execute(){
                //get secondary structure info.
                readMap();
                
+               if (namefile != "") { nameMap = m->readNames(namefile); }
+               
+               if (m->control_pressed) { return 0; }
+               
                ifstream in;
                m->openInputFile(fastafile, in);
                
@@ -166,8 +182,9 @@ int AlignCheckCommand::execute(){
                string outfile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "align.check";
                m->openOutputFile(outfile, out);
                
+                               
                out << "name" << '\t' << "pound" << '\t' << "dash" << '\t' << "plus" << '\t' << "equal" << '\t';
-               out << "loop" << '\t' << "tilde" << '\t' << "total" << endl;
+               out << "loop" << '\t' << "tilde" << '\t' << "total"  << '\t' << "numseqs" << endl;
 
                
                while(!in.eof()){
@@ -179,8 +196,17 @@ int AlignCheckCommand::execute(){
                                
                                if (haderror == 1) { break; }
                                
+                               int num = 1;
+                               if (namefile != "") {
+                                       //make sure this sequence is in the namefile, else error 
+                                       map<string, int>::iterator it = nameMap.find(seq.getName());
+                                       
+                                       if (it == nameMap.end()) { cout << "[ERROR]: " << seq.getName() << " is not in your namefile, please correct." << endl; m->control_pressed = true; }
+                                       else { num = it->second; }
+                               }
+                               
                                out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
-                               out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+                               out << data.loop << '\t' << data.tilde << '\t' << data.total << '\t' << num << endl;
                        }
                }
 
@@ -305,6 +331,4 @@ statData AlignCheckCommand::getStats(string sequence){
                exit(1);
        }
 }
-
-
 //**********************************************************************************************************************
index c1cc057cd190cd4570b57d9bcc9f622a8f6bf0f4..0b654a4e02bdf42cdf5ebfc9ee419bdbd71eec2f 100644 (file)
@@ -44,11 +44,12 @@ class AlignCheckCommand : public Command {
                
        private:
                vector<int> structMap;
-               string mapfile, fastafile, outputDir;
+               string mapfile, fastafile, outputDir, namefile;
                bool abort;
                int seqLength, haderror;
                vector<string> outputNames;
                map<string, vector<string> > outputTypes;
+               map<string, int> nameMap;
                
                void readMap();
                statData getStats(string sequence);
index 8730997883ac5da6f8588e216f8311285303160d..c0f91969eda2277e4c8b02c3a78db2a7390716fe 100644 (file)
@@ -172,7 +172,7 @@ int SeqSummaryCommand::execute(){
                vector<int> ambigBases;
                vector<int> longHomoPolymer;
                
-               if (namefile != "") { readNames(); }
+               if (namefile != "") { nameMap = m->readNames(namefile); }
                
                if (m->control_pressed) { return 0; }
                        
@@ -480,7 +480,7 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
-                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your namefile, please correct." << endl; m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
@@ -589,34 +589,6 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
        }
 }
 /**********************************************************************************************************************/
-int SeqSummaryCommand::readNames() { 
-       try {
-               //open input file
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               while (!in.eof()) {
-                       if (m->control_pressed) { break; }
-                       
-                       string firstCol, secondCol;
-                       in >> firstCol >> secondCol; m->gobble(in);
-                       
-                       int num = m->getNumNames(secondCol);
-                       
-                       nameMap[firstCol] = num;
-               }
-               in.close();
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SeqSummaryCommand", "readNames");
-               exit(1);
-       }
-}
-
-/**********************************************************************************************************************/
 
 
 
index fb24141f08a1314a386bc58899ec57ecf6c56e56..24c29a107b8189778c62b2eeb49258957a321be6 100644 (file)
@@ -44,7 +44,6 @@ private:
        
        int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
        int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);       
-       int readNames();
 
        #ifdef USE_MPI
        int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long int>&);