]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed classify.seqs output file name - had issue if reference taxonomy file did not...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 4 Jun 2012 19:40:50 +0000 (15:40 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 4 Jun 2012 19:40:50 +0000 (15:40 -0400)
classifyseqscommand.cpp
rarefactcommand.cpp
rarefactsharedcommand.cpp
rarefactsharedcommand.h

index 4e244905ff6b755fd064617368506b9160770048..b6dc24fe5751867c6b79f71fb50e3b7b74ad75a6 100644 (file)
@@ -465,8 +465,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                     }
                 }
             }
-                       
-               }
+        }
        }
        catch(exception& e) {
                m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
@@ -503,11 +502,17 @@ int ClassifySeqsCommand::execute(){
                        string baseTName = taxonomyFileName;
                        if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
                        
-                       string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
-                       RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
-                       if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
-                       if (RippedTaxName != "") { RippedTaxName +=  "."; }
-           
+            //set rippedTaxName to 
+                       string RippedTaxName = "";
+            bool foundDot = false;
+            for (int i = baseTName.length()-1; i >= 0; i--) {
+                cout << baseTName[i] << endl;
+                if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
+                else if (foundDot && (baseTName[i] == '.')) {  break; }
+                else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
+            }
+            if (RippedTaxName != "") { RippedTaxName +=  "."; }   
+          
                        if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
                        string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
                        string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
index 652ff4e49ecf163fc71dad48b40eb1485012b105..0fdd0798e2cdda014a6cc568d3a1c895362c3158 100644 (file)
@@ -285,13 +285,8 @@ int RareFactCommand::execute(){
         map<string, set<int> > labelToEnds;
                if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
                else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
-               for (map<string, set<int> >::iterator it = labelToEnds.begin(); it != labelToEnds.end(); it++) {
-            cout << it->first << endl;
-            for (set<int>::iterator its = (it->second).begin(); its != (it->second).end(); its++) {
-                cout << (*its) << endl;
-            }
-        }
-               if (m->control_pressed) { return 0; }
+        
+        if (m->control_pressed) { return 0; }
                
                map<int, string> file2Group; //index in outputNames[i] -> group
                for (int p = 0; p < inputFileNames.size(); p++) {
index 726ddd61b75c6185cca6b4ebac108b7dd1b4d44f..fecf972c7ec99f80dda6f921706c6916a9d1679f 100644 (file)
@@ -24,7 +24,8 @@ vector<string> RareFactSharedCommand::setParameters(){
                CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -41,8 +42,7 @@ string RareFactSharedCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The collect.shared command parameters are shared, label, freq, calc and groups.  shared is required if there is no current sharedfile. \n";
-               helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc.  shared is required if there is no current sharedfile. \n";
+               helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc.  shared is required if there is no current sharedfile. \n";
         helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
                helpString += "The rarefaction command should be in the following format: \n";
@@ -196,6 +196,9 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                        if (m->isTrue(temp)) { jumble = true; }
                        else { jumble = false; }
                        m->jumble = jumble;
+            
+            temp = validParameter.validFile(parameters, "groupmode", false);           if (temp == "not found") { temp = "T"; }
+                       groupMode = m->isTrue(temp);
                                
                }
 
@@ -226,6 +229,8 @@ int RareFactSharedCommand::execute(){
             for (int i = 0; i < Sets.size(); i++) {
                 process(designMap, Sets[i]);
             }
+            
+            if (groupMode) { outputNames = createGroupFile(outputNames); }
         }
                     
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
@@ -288,6 +293,7 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
                                        outputNames.push_back(fileNameRoot+"shared.r_nseqs"); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+"shared.r_nseqs");
                                }
                        }
+            file2Group[outputNames.size()-1] = thisSet;
                }
                
                //if the users entered no valid calculators don't execute command
@@ -432,3 +438,161 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
        }
 }
 //**********************************************************************************************************************
+vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
+       try {
+               
+               vector<string> newFileNames;
+               
+               //find different types of files
+               map<string, map<string, string> > typesFiles;
+        map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
+        vector<string> groupNames;
+               for (int i = 0; i < outputNames.size(); i++) {
+            
+                       string extension = m->getExtension(outputNames[i]);
+            string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
+                       m->mothurRemove(combineFileName); //remove old file
+            
+                       ifstream in;
+                       m->openInputFile(outputNames[i], in);
+                       
+                       string labels = m->getline(in);
+            
+                       istringstream iss (labels,istringstream::in);
+            string newLabel = ""; vector<string> theseLabels;
+            while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
+            vector< vector<string> > allLabels;
+            vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
+            for (int j = 1; j < theseLabels.size()-1; j++) {
+                if (theseLabels[j+1] == "lci") {
+                    thisSet.push_back(theseLabels[j]); 
+                    thisSet.push_back(theseLabels[j+1]); 
+                    thisSet.push_back(theseLabels[j+2]);
+                    j++; j++;
+                }else{ //no lci or hci for this calc.
+                    thisSet.push_back(theseLabels[j]); 
+                }
+                allLabels.push_back(thisSet); 
+                thisSet.clear();
+            }
+            fileLabels[combineFileName] = allLabels;
+            
+            map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
+            if (itfind != typesFiles.end()) {
+                (itfind->second)[outputNames[i]] = file2Group[i];
+            }else {
+                map<string, string> temp;  
+                temp[outputNames[i]] = file2Group[i];
+                typesFiles[extension] = temp;
+            }
+            if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
+               }
+               
+               //for each type create a combo file
+               
+               for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
+                       
+                       ofstream out;
+                       string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
+                       m->openOutputFileAppend(combineFileName, out);
+                       newFileNames.push_back(combineFileName);
+                       map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
+            set<int> numSampledSet;
+            
+                       //open each type summary file
+                       map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
+                       int maxLines = 0;
+                       for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
+                
+                string thisfilename = itFileNameGroup->first;
+                string group = itFileNameGroup->second;
+                
+                               ifstream temp;
+                               m->openInputFile(thisfilename, temp);
+                               
+                               //read through first line - labels
+                               m->getline(temp);       m->gobble(temp);
+                               
+                               map<int, vector< vector<string> > > thisFilesLines;
+                               while (!temp.eof()){
+                    int numSampled = 0;
+                    temp >> numSampled; m->gobble(temp);
+                    
+                    vector< vector<string> > theseReads;
+                    vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
+                    for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+                        vector<string> reads;
+                        string next = "";
+                        for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+                            temp >> next; m->gobble(temp);
+                            reads.push_back(next);
+                        }
+                        theseReads.push_back(reads);
+                    }
+                    thisFilesLines[numSampled] = theseReads;
+                    m->gobble(temp);
+                    
+                    numSampledSet.insert(numSampled);
+                               }
+                               
+                               files[group] = thisFilesLines;
+                               
+                               //save longest file for below
+                               if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
+                               
+                               temp.close();
+                               m->mothurRemove(thisfilename);
+                       }
+                       
+            //output new labels line
+            out << fileLabels[combineFileName][0][0] << '\t';
+            for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+                for (int n = 0; n < groupNames.size(); n++) { // for each group
+                    for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+                        out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
+                    }
+                }
+            }
+                       out << endl;
+            
+                       //for each label
+                       for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
+                               
+                out << (*itNumSampled) << '\t';
+                
+                if (m->control_pressed) { break; }
+                
+                for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
+                                   //grab data for each group
+                    for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
+                        
+                        string group = itFileNameGroup->first;
+                        
+                        map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
+                        if (itLine != files[group].end()) { 
+                            for (int l = 0; l < (itLine->second)[k].size(); l++) { 
+                                out << (itLine->second)[k][l] << '\t';
+                                
+                            }                             
+                        }else { 
+                            for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
+                                out << "NA" << '\t';
+                            } 
+                        }
+                    }
+                }
+                out << endl;
+                       }       
+                       out.close();
+               }
+               
+               //return combine file name
+               return newFileNames;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
index 1a2d944894feabe26b4103e5431c0da08c6574b4..a210574002969c411be4d9e9c705af9d88c980f0 100644 (file)
@@ -40,12 +40,14 @@ private:
        string format;
        float freq;
        
-       bool abort, allLines, jumble;
+     map<int, string> file2Group; //index in outputNames[i] -> group
+       bool abort, allLines, jumble, groupMode;
        set<string> labels; //holds labels to be used
        string label, calc, groups, outputDir, sharedfile, designfile;
        vector<string>  Estimators, Groups, outputNames, Sets;
     
     int process(GroupMap&, string);
+    vector<string> createGroupFile(vector<string>&);
 
 };