]> git.donarmstrong.com Git - mothur.git/commitdiff
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 20 Apr 2012 16:52:21 +0000 (12:52 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 20 Apr 2012 16:52:21 +0000 (12:52 -0400)
63 files changed:
commandoptionparser.cpp
getcurrentcommand.cpp
makebiomcommand.cpp
makebiomcommand.h
mothurout.cpp
mothurout.h
optionparser.cpp
setcurrentcommand.cpp
setcurrentcommand.h
sharedcommand.cpp
sharedcommand.h
uchime_src/addtargets2.cpp [new file with mode: 0644]
uchime_src/alignchime.cpp [new file with mode: 0644]
uchime_src/alignchimel.cpp [new file with mode: 0644]
uchime_src/allocs.h [new file with mode: 0644]
uchime_src/alnheuristics.h [new file with mode: 0644]
uchime_src/alnparams.cpp [new file with mode: 0644]
uchime_src/alnparams.h [new file with mode: 0644]
uchime_src/alpha.cpp [new file with mode: 0644]
uchime_src/alpha.h [new file with mode: 0644]
uchime_src/alpha2.cpp [new file with mode: 0644]
uchime_src/chainer.h [new file with mode: 0644]
uchime_src/chime.h [new file with mode: 0644]
uchime_src/counters.h [new file with mode: 0644]
uchime_src/diagbox.h [new file with mode: 0644]
uchime_src/dp.h [new file with mode: 0644]
uchime_src/evalue.h [new file with mode: 0644]
uchime_src/fractid.cpp [new file with mode: 0644]
uchime_src/getparents.cpp [new file with mode: 0644]
uchime_src/globalalign2.cpp [new file with mode: 0644]
uchime_src/help.h [new file with mode: 0644]
uchime_src/hsp.h [new file with mode: 0644]
uchime_src/hspfinder.h [new file with mode: 0644]
uchime_src/make3way.cpp [new file with mode: 0644]
uchime_src/mk [new file with mode: 0755]
uchime_src/mx.cpp [new file with mode: 0644]
uchime_src/mx.h [new file with mode: 0644]
uchime_src/myopts.h [new file with mode: 0644]
uchime_src/myutils.cpp [new file with mode: 0644]
uchime_src/myutils.h [new file with mode: 0644]
uchime_src/orf.h [new file with mode: 0644]
uchime_src/out.h [new file with mode: 0644]
uchime_src/path.cpp [new file with mode: 0644]
uchime_src/path.h [new file with mode: 0644]
uchime_src/searchchime.cpp [new file with mode: 0644]
uchime_src/seq.h [new file with mode: 0644]
uchime_src/seqdb.cpp [new file with mode: 0644]
uchime_src/seqdb.h [new file with mode: 0644]
uchime_src/setnucmx.cpp [new file with mode: 0644]
uchime_src/sfasta.cpp [new file with mode: 0644]
uchime_src/sfasta.h [new file with mode: 0644]
uchime_src/svnmods.h [new file with mode: 0644]
uchime_src/svnversion.h [new file with mode: 0644]
uchime_src/timers.h [new file with mode: 0644]
uchime_src/timing.h [new file with mode: 0644]
uchime_src/tracebackbit.cpp [new file with mode: 0644]
uchime_src/uc.h [new file with mode: 0644]
uchime_src/uchime_main.cpp [new file with mode: 0644]
uchime_src/ultra.h [new file with mode: 0644]
uchime_src/usort.cpp [new file with mode: 0644]
uchime_src/viterbifast.cpp [new file with mode: 0644]
uchime_src/windex.h [new file with mode: 0644]
uchime_src/writechhit.cpp [new file with mode: 0644]

index e356afe5c74972e5298ddfa87a0672c97c4facfe..3b45e48f4d992e470867a3071ff8d8f18c519dca 100644 (file)
@@ -28,10 +28,7 @@ CommandOptionParser::CommandOptionParser(string input){
                }
                else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing ("); m->mothurOutEndLine(); }
                else if (closeParen == -1) { m->mothurOut("[ERROR]:You are missing )"); m->mothurOutEndLine(); }
-                                       
-               //GlobalData* globaldata = GlobalData::getInstance();
-               //globaldata->parseGlobalData(commandString, optionString);                     //parser to separate and check options
-       }
+    }
        catch(exception& e) {
                m->errorOut(e, "CommandOptionParser", "CommandOptionParser");
                exit(1);
index 12dcc82ff6a1bfb1a105d3d6deac903e855cea9b..ca832314d66dbe371e314db292ab6f441d2ee351 100644 (file)
@@ -138,6 +138,8 @@ int GetCurrentCommand::execute(){
                                        m->setTaxonomyFile("");
                                }else if (types[i] == "flow") {
                                        m->setFlowFile("");
+                }else if (types[i] == "biom") {
+                                       m->setBiomFile("");
                                }else if (types[i] == "processors") {
                                        m->setProcessors("1");
                                }else if (types[i] == "all") {
index eb46b97b9566773d5e02b3d5e4c38e50fae94719..41f2316b3a036840208635d3abbacd89e59f6413 100644 (file)
@@ -513,7 +513,7 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
             //convert list file bin labels to shared file bin labels
             //parse tax strings
             //save in map
-            map<string, vector<string> > labelTaxMap;
+            map<string, string> labelTaxMap;
             string snumBins = toString(otuLabels.size());
             for (int i = 0; i < otuLabels.size(); i++) {  
                 
@@ -528,11 +528,7 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
                 }
                 binLabel += sbinNumber;
                 
-                vector<string> taxString;
-                m->splitAtChar(taxs[i], taxString, ';');
-                taxString.pop_back(); //adds blank string because taxonomies end in ;
-                
-                labelTaxMap[binLabel] = taxString;
+                labelTaxMap[binLabel] = taxs[i];
             }
             
             
@@ -540,7 +536,7 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
             
             //traverse the binLabels forming the metadata strings and saving them
             //make sure to sanity check
-            map<string, vector<string> >::iterator it;
+            map<string, string>::iterator it;
             for (int i = 0; i < m->currentBinLabels.size(); i++) {
                 
                 if (m->control_pressed) { return metadata; }
@@ -549,37 +545,24 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
                 
                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
                 else {
+                    vector<string> bootstrapValues;
                     string data = "{\"taxonomy\":[";
-                    for (int j = 0; j < (it->second).size()-1; j ++) {
-                        
-                        string taxon = (it->second)[j];
-                        
-                        //strip "" if they are there
-                        int pos = taxon.find("\"");
-                        if (pos != string::npos) {
-                            string newTax = "";
-                            for (int k = 0; k < taxon.length(); k++) {
-                                if (taxon[k] != '\"') { newTax += taxon[k]; }
-                            }
-                            taxon = newTax;
-                        }
-                        
-                        data += "\"" + taxon + "\", ";
-                    }
+            
+                    vector<string> scores;
+                    vector<string> taxonomies = parseTax(it->second, scores);
                     
-                    string taxon = (it->second)[(it->second).size()-1];
+                    for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
+                    data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
                     
-                    //strip "" if they are there
-                    int pos = taxon.find("\"");
-                    if (pos != string::npos) {
-                        string newTax = "";
-                        for (int k = 0; k < taxon.length(); k++) {
-                            if (taxon[k] != '\"') { newTax += taxon[k]; }
-                        }
-                        taxon = newTax;
+                    //add bootstrap values if available
+                    if (scores[0] != "null") {
+                        data += ", \"bootstrap\":[";
+                        
+                        for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
+                        data += scores[scores.size()-1] + "]";
+
                     }
-                    
-                    data += "\"" + taxon + "\"]} ";
+                    data += "}";
                     
                     metadata.push_back(data);
                 }
@@ -594,7 +577,58 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
                exit(1);
        }
 
-}        
+}
+/**************************************************************************************************/
+//returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
+vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
+       try {
+               
+               string taxon;
+        vector<string> taxs;
+               
+               while (tax.find_first_of(';') != -1) {
+                       
+                       if (m->control_pressed) { return taxs; }
+                       
+                       //get taxon
+                       taxon = tax.substr(0,tax.find_first_of(';'));
+            
+                       int pos = taxon.find_last_of('(');
+                       if (pos != -1) {
+                               //is it a number?
+                               int pos2 = taxon.find_last_of(')');
+                               if (pos2 != -1) {
+                                       string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
+                                       if (m->isNumeric1(confidenceScore)) {
+                                               taxon = taxon.substr(0, pos); //rip off confidence 
+                        scores.push_back(confidenceScore);
+                                       }else{ scores.push_back("null"); }
+                               }
+                       }
+                       
+            //strip "" if they are there
+            pos = taxon.find("\"");
+            if (pos != string::npos) {
+                string newTax = "";
+                for (int k = 0; k < taxon.length(); k++) {
+                    if (taxon[k] != '\"') { newTax += taxon[k]; }
+                }
+                taxon = newTax;
+            }
+            
+            //look for bootstrap value
+                       taxs.push_back(taxon);
+            tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+               }
+               
+               return taxs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeBiomCommand", "parseTax");
+               exit(1);
+       }
+}
+
 //**********************************************************************************************************************
 
 
index 31077aecb64add7041ec1ac0cb3ae9ecf5bba232..9f80c2e70215b130a9020a83ce2daebb9e63a089 100644 (file)
@@ -42,6 +42,7 @@ private:
     
     int getBiom(vector<SharedRAbundVector*>&);
     vector<string> getMetaData(vector<SharedRAbundVector*>&);
+    vector<string> parseTax(string tax, vector<string>& scores);
 };
 
 
index ee809fafc6e39f6c5cc02d78b513d82704864aa8..ae4cb524cba877f5a9ec843e1686569032c6528d 100644 (file)
@@ -40,6 +40,7 @@ void MothurOut::printCurrentFiles()  {
                if (taxonomyfile != "")         {  mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine();           }
                if (treefile != "")                     {  mothurOut("tree=" + treefile); mothurOutEndLine();                           }
                if (flowfile != "")                     {  mothurOut("flow=" + flowfile); mothurOutEndLine();                           }
+        if (biomfile != "")                    {  mothurOut("biom=" + biomfile); mothurOutEndLine();                           }
                if (processors != "1")          {  mothurOut("processors=" + processors); mothurOutEndLine();           }
                
        }
@@ -73,6 +74,7 @@ bool MothurOut::hasCurrentFiles()  {
                if (taxonomyfile != "")         {  return true;                 }
                if (treefile != "")                     {  return true;                 }
                if (flowfile != "")                     {  return true;                 }
+        if (biomfile != "")                    {  return true;                 }
                if (processors != "1")          {  return true;                 }
                
                return hasCurrent;
@@ -107,6 +109,7 @@ void MothurOut::clearCurrentFiles()  {
                accnosfile = "";
                taxonomyfile = "";      
                flowfile = "";
+        biomfile = "";
                processors = "1";
        }
        catch(exception& e) {
@@ -1962,6 +1965,25 @@ void MothurOut::splitAtComma(string& estim, vector<string>& container) {
                exit(1);
        }       
 }
+/***********************************************************************/
+//This function splits up the various option parameters
+void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
+       try {
+               prefix = suffix.substr(0,suffix.find_first_of(c));
+               if ((suffix.find_first_of(c)+2) <= suffix.length()) {  //checks to make sure you don't have comma at end of string
+                       suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
+                       string space = " ";
+                       while(suffix.at(0) == ' ')
+                               suffix = suffix.substr(1, suffix.length());
+               }
+        
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "splitAtComma");
+               exit(1);
+       }       
+}
+
 /***********************************************************************/
 
 //This function splits up the various option parameters
index 2a6ba2de6316eb24ff281d703f8e27d4c94a45a3..9ce698be4e1963e6af9abee67673ded3e1743fde 100644 (file)
@@ -128,6 +128,7 @@ class MothurOut {
                void splitAtDash(string&, set<string>&);
                void splitAtDash(string&, vector<string>&);
                void splitAtChar(string&, vector<string>&, char);
+        void splitAtChar(string&, string&, char);
                int removeConfidences(string&);
                
                //math operation
@@ -162,6 +163,7 @@ class MothurOut {
                string getAccnosFile()          { return accnosfile;            }
                string getTaxonomyFile()        { return taxonomyfile;          }
                string getFlowFile()            { return flowfile;                      }
+        string getBiomFile()           { return biomfile;                      }
                string getProcessors()          { return processors;            }
                
                void setListFile(string f)                      { listfile = getFullPathName(f);                        }
@@ -184,6 +186,7 @@ class MothurOut {
                void setAccnosFile(string f)            { accnosfile = getFullPathName(f);                      }
                void setTaxonomyFile(string f)          { taxonomyfile = getFullPathName(f);            }
                void setFlowFile(string f)                      { flowfile = getFullPathName(f);                        }
+        void setBiomFile(string f)                     { biomfile = getFullPathName(f);                        }
                void setProcessors(string p)            { processors = p;                                                       }
                
                void printCurrentFiles();
@@ -217,6 +220,7 @@ class MothurOut {
                        taxonomyfile = "";
                        processors = "1";
                        flowfile = "";
+            biomfile = "";
                        gui = false;
                        printedHeaders = false;
                        commandInputsConvertError = false;
@@ -229,7 +233,7 @@ class MothurOut {
                string defaultPath, outputDir;
                string releaseDate, version;
        
-               string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile;
+               string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
                string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
 
                vector<string> Groups;
index 06a900d8137a031236d52261882c744bb169b440..0d6ed2d4d0bfa681a7aa30693bcb7ae30a9c77fb 100644 (file)
@@ -91,6 +91,8 @@ map<string, string> OptionParser::getParameters() {
                                        it->second = m->getAccnosFile();
                                }else if (it->first == "taxonomy") {
                                        it->second = m->getTaxonomyFile();
+                }else if (it->first == "biom") {
+                        it->second = m->getBiomFile();
                                }else {
                                        m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
                                }
index 5582abd03d6220da752943278cad50f4bceb1a76..96735405c5e148159ab762a720a93f88ec2354ee 100644 (file)
@@ -15,6 +15,7 @@ vector<string> SetCurrentCommand::setParameters(){
                
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pflow);
+        CommandParameter pbiom("biom", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pbiom);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pphylip);
                CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcolumn);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
@@ -52,7 +53,7 @@ string SetCurrentCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The set.current command allows you to set the current files saved by mothur.\n";
-               helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, taxonomy.\n";
+               helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom and taxonomy.\n";
                helpString += "The clear paramter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n";
                helpString += "The set.current command should be in the following format: \n";
                helpString += "set.current(fasta=yourFastaFile) or set.current(fasta=amazon.fasta, clear=name-accnos)\n";
@@ -272,6 +273,14 @@ SetCurrentCommand::SetCurrentCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["flow"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("biom");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["biom"] = inputDir + it->second;             }
+                               }
                        }
                        
                        //check for parameters
@@ -374,6 +383,11 @@ SetCurrentCommand::SetCurrentCommand(string option)  {
                        if (flowfile == "not open") { m->mothurOut("Ignoring: " + parameters["flow"]); m->mothurOutEndLine(); flowfile = ""; }
                        else if (flowfile == "not found") {  flowfile = "";  }  
                        if (flowfile != "") { m->setFlowFile(flowfile); }
+            
+            biomfile = validParameter.validFile(parameters, "biom", true);
+                       if (biomfile == "not open") { m->mothurOut("Ignoring: " + parameters["biom"]); m->mothurOutEndLine(); biomfile = ""; }
+                       else if (biomfile == "not found") {  biomfile = "";  }  
+                       if (biomfile != "") { m->setBiomFile(biomfile); }
                        
                        processors = validParameter.validFile(parameters, "processors", false);
                        if (processors == "not found") {  processors = "1";  }  
@@ -444,6 +458,8 @@ int SetCurrentCommand::execute(){
                                        m->setTaxonomyFile("");
                                }else if (types[i] == "flow") {
                                        m->setFlowFile("");
+                }else if (types[i] == "biom") {
+                                       m->setBiomFile("");
                                }else if (types[i] == "processors") {
                                        m->setProcessors("1");
                                }else if (types[i] == "all") {
index 0033ed58cf25a274e9e4bbe2a17aa4e4cb298ef0..3949519e9ec3f04392d555539fd5409dcab59dea 100644 (file)
@@ -38,7 +38,7 @@ private:
        string clearTypes;
        vector<string> types;
        
-       string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile;
+       string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
        string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
 
        
index 63f83e19a61fec97a6a64121da4f5ea973b5815a..8f05cfcc567a8f0e5d9cbdc867bf15bebd887f11 100644 (file)
@@ -8,6 +8,8 @@
  */
 
 #include "sharedcommand.h"
+#include "sharedutilities.h"
+
 //********************************************************************************************************************
 //sorts lowest to highest
 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
@@ -16,8 +18,9 @@ inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* r
 //**********************************************************************************************************************
 vector<string> SharedCommand::setParameters(){ 
        try {
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
+        CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
+               CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
                //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -37,10 +40,10 @@ vector<string> SharedCommand::setParameters(){
 string SharedCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The make.shared command reads a list and group file and creates a shared file, as well as a rabund file for each group.\n";
-               helpString += "The make.shared command parameters are list, group, ordergroup, groups and label. list and group are required unless a current file is available.\n";
+               helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
+               helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
                helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
-               helpString += "The label parameter allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
+               helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
                //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
                return helpString;
        }
@@ -111,12 +114,20 @@ SharedCommand::SharedCommand(string option)  {
                                         if (path == "") {      parameters["group"] = inputDir + it->second;            }
                                 }
                         
-                                it = parameters.find("ordergroup");
+                                /*it = parameters.find("ordergroup");
                                 //user has given a template file
                                 if(it != parameters.end()){ 
                                         path = m->hasPath(it->second);
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {      parameters["ordergroup"] = inputDir + it->second;               }
+                                }*/
+                 
+                 it = parameters.find("biom");
+                                //user has given a template file
+                                if(it != parameters.end()){ 
+                                        path = m->hasPath(it->second);
+                                        //if the user has not given a path then, add inputdir. else leave path alone.
+                                        if (path == "") {      parameters["biom"] = inputDir + it->second;             }
                                 }
                         }
                         
@@ -127,11 +138,13 @@ SharedCommand::SharedCommand(string option)  {
                         //check for required parameters
                         listfile = validParameter.validFile(parameters, "list", true);
                         if (listfile == "not open") { listfile = ""; abort = true; }
-                        else if (listfile == "not found") { 
-                                listfile = m->getListFile(); 
-                                if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
-                                else {         m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
-                        }else { m->setListFile(listfile); }    
+                        else if (listfile == "not found") { listfile = "";  }
+                        else { m->setListFile(listfile); }     
+            
+             biomfile = validParameter.validFile(parameters, "biom", true);
+             if (biomfile == "not open") { biomfile = ""; abort = true; }
+             else if (biomfile == "not found") { biomfile = "";  }
+             else { m->setBiomFile(biomfile); }                
                                                        
                         ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
                         if (ordergroupfile == "not open") { abort = true; }    
@@ -139,28 +152,37 @@ SharedCommand::SharedCommand(string option)  {
                                                 
                         groupfile = validParameter.validFile(parameters, "group", true);
                         if (groupfile == "not open") { groupfile = ""; abort = true; } 
-                        else if (groupfile == "not found") { 
-                                groupfile = m->getGroupFile(); 
-                                if (groupfile != "") { 
-                                        m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine();
-                                        groupMap = new GroupMap(groupfile);
-                                        
-                                        int error = groupMap->readMap();
-                                        if (error == 1) { abort = true; }
-                                        vector<string> allGroups = groupMap->getNamesOfGroups();
-                                        m->setAllGroups(allGroups);
-                                }
-                                else {         m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
-                        }else {  
-                                groupMap = new GroupMap(groupfile);
-                        
-                                int error = groupMap->readMap();
-                                if (error == 1) { abort = true; }
-                                vector<string> allGroups = groupMap->getNamesOfGroups();
-                                m->setAllGroups(allGroups);
-                                m->setGroupFile(groupfile);
-                        }
+                        else if (groupfile == "not found") { groupfile = ""; }
+                        else {  m->setGroupFile(groupfile); }
                         
+            if ((biomfile == "") && (listfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to list, then biom
+                               listfile = m->getListFile(); 
+                               if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       biomfile = m->getBiomFile(); 
+                                       if (biomfile != "") {  m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+                       else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if (listfile != "") {
+                               if (groupfile == "") { 
+                                       groupfile = m->getGroupFile(); 
+                                       if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine(); 
+                                               abort = true; 
+                                       }       
+                               }
+                       }
+
+                        
                         string groups = validParameter.validFile(parameters, "groups", false);                 
                         if (groups == "not found") { groups = ""; }
                         else { 
@@ -190,238 +212,23 @@ int SharedCommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
+                       
                //getting output filename
-               filename = listfile;
+        string filename = "";
+               if (listfile != "") { filename = listfile; }
+        else { filename = biomfile; }
                
                if (outputDir == "") { outputDir += m->hasPath(filename); }
                
                filename = outputDir + m->getRootName(m->getSimpleName(filename));
                filename = filename + "shared";
-               outputTypes["shared"].push_back(filename);
-               
-               m->openOutputFile(filename, out);
-               pickedGroups = false;
-               
-               //if hte user has not specified any groups then use them all
-               if (Groups.size() == 0) {
-                       Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
-               }else { pickedGroups = true; }
-               
-               //fill filehandles with neccessary ofstreams
-               int i;
-               ofstream* temp;
-               for (i=0; i<Groups.size(); i++) {
-                       temp = new ofstream;
-                       filehandles[Groups[i]] = temp;
-               }
-               
-               //set fileroot
-               fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
-               
-               //clears file before we start to write to it below
-               for (int i=0; i<Groups.size(); i++) {
-                       m->mothurRemove((fileroot + Groups[i] + ".rabund"));
-                       outputNames.push_back((fileroot + Groups[i] + ".rabund"));
-                       outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
-               }
-               
-               //lookup.clear();
-               string errorOff = "no error";
-               //errorOff = "";
-               
-               //if user provided an order file containing the order the shared file should be in read it
-               if (ordergroupfile != "") { readOrderFile(); }
-               
-               input = new InputData(listfile, "shared");
-               SharedList = input->getSharedListVector();
-               string lastLabel = SharedList->getLabel();
-               vector<SharedRAbundVector*> lookup; 
-               
-               if (m->control_pressed) { 
-                       delete input; delete SharedList; delete groupMap; 
-                       for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                       out.close(); m->mothurRemove(filename); 
-                       for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                       return 0; 
-               }
-               
-               //sanity check
-               int error = ListGroupSameSeqs();
-               
-               if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
-                       m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
-                       
-                       out.close();
-                       m->mothurRemove(filename); //remove blank shared file you made
-                       
-                       createMisMatchFile();
-                       
-                       //delete memory
-                       for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
-                               delete it3->second;
-                       }
-               
-                       delete input; delete SharedList; delete groupMap; 
-                       
-                       return 0; 
-               }
-               
-               if (error == 1) { m->control_pressed = true; }
-               
-               //if user has specified groups make new groupfile for them
-               if (pickedGroups) { //make new group file
-                       string groups = "";
-                       if (m->getNumGroups() < 4) {
-                               for (int i = 0; i < m->getNumGroups(); i++) {
-                                       groups += (m->getGroups())[i] + ".";
-                               }
-                       }else { groups = "merge"; }
-               
-                       string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + "groups";
-                       outputTypes["group"].push_back(newGroupFile); 
-                       outputNames.push_back(newGroupFile);
-                       ofstream outGroups;
-                       m->openOutputFile(newGroupFile, outGroups);
-               
-                       vector<string> names = groupMap->getNamesSeqs();
-                       string groupName;
-                       for (int i = 0; i < names.size(); i++) {
-                               groupName = groupMap->getGroup(names[i]);
-                               if (isValidGroup(groupName, m->getGroups())) {
-                                       outGroups << names[i] << '\t' << groupName << endl;
-                               }
-                       }
-                       outGroups.close();
-               }
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;        
-       
-               while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-                       if (m->control_pressed) { 
-                               delete input; delete SharedList; delete groupMap;
-                               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                               out.close(); m->mothurRemove(filename); 
-                               for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                               return 0; 
-                       }
-               
-                       if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
-                                       
-                                       lookup = SharedList->getSharedRAbundVector();
-                                       
-                                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       if (pickedGroups) { //check for otus with no seqs in them
-                                               eliminateZeroOTUS(lookup);
-                                       }
-                                       
-                                       if (m->control_pressed) { 
-                                               delete input; delete SharedList; delete groupMap; 
-                                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-                                               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                                               out.close(); m->mothurRemove(filename); 
-                                               for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                                               return 0; 
-                                       }
-                                       
-                                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                                       printSharedData(lookup); //prints info to the .shared file
-                                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-                               
-                                       processedLabels.insert(SharedList->getLabel());
-                                       userLabels.erase(SharedList->getLabel());
-                       }
-                       
-                       if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
-                                       string saveLabel = SharedList->getLabel();
-                                       
-                                       delete SharedList;
-                                       SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
-                                       
-                                       lookup = SharedList->getSharedRAbundVector();
-                                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       if (pickedGroups) { //check for otus with no seqs in them
-                                               eliminateZeroOTUS(lookup);
-                                       }
-                                       
-                                       
-                                       if (m->control_pressed) { 
-                                               delete input; delete SharedList; delete groupMap; 
-                                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-                                               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                                               out.close(); m->mothurRemove(filename); 
-                                               for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                                               return 0; 
-                                       }
-                                       
-                                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                                       printSharedData(lookup); //prints info to the .shared file
-                                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-                                       
-                                       processedLabels.insert(SharedList->getLabel());
-                                       userLabels.erase(SharedList->getLabel());
-                                       
-                                       //restore real lastlabel to save below
-                                       SharedList->setLabel(saveLabel);
-                       }
-                       
-               
-                       lastLabel = SharedList->getLabel();
-                               
-                       delete SharedList;
-                       SharedList = input->getSharedListVector(); //get new list vector to process
-               }
-               
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       if (processedLabels.count(lastLabel) != 1) {
-                               needToRun = true;
-                       }
-               }
-               
-               //run last label if you need to
-               if (needToRun == true)  {
-                       if (SharedList != NULL) {       delete SharedList;      }
-                       SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
-                                       
-                       lookup = SharedList->getSharedRAbundVector();
-                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                       if (pickedGroups) { //check for otus with no seqs in them
-                               eliminateZeroOTUS(lookup);
-                       }
-                       
-                       if (m->control_pressed) { 
-                               delete input;  delete groupMap;
-                                       for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
-                                       out.close(); m->mothurRemove(filename); 
-                                       for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                                       return 0; 
-                       }
-                       
-                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                       printSharedData(lookup); //prints info to the .shared file
-                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-                       delete SharedList;
-               }
-               
-               out.close();
-               
-               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
-                       delete it3->second;
-               }
-
-               delete input; delete groupMap;
-               
-               if (m->control_pressed) { 
-                               m->mothurRemove(filename); 
-                               for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));             }
-                               return 0; 
-               }
+               outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
                
+        if (listfile != "") {  createSharedFromListGroup(filename);  }
+        else {   createSharedFromBiom(filename);  }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  }
+        
                //set rabund file as new current rabundfile
                string current = "";
                itTypes = outputTypes.find("rabund");
@@ -442,7 +249,6 @@ int SharedCommand::execute(){
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOut(filename); m->mothurOutEndLine();
                m->mothurOutEndLine();
                
                return 0;
@@ -453,7 +259,718 @@ int SharedCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
+int SharedCommand::createSharedFromBiom(string filename) {
+       try {
+        ofstream out;
+        m->openOutputFile(filename, out);
+        
+        /*{
+            "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
+            "format": "Biological Observation Matrix 0.9.1",
+            "format_url": "http://biom-format.org",
+            "type": "OTU table",
+            "generated_by": "mothur1.24.0",
+            "date": "Tue Apr 17 13:12:07 2012", */
+        
+        ifstream in;
+        m->openInputFile(biomfile, in);
+        
+        m->getline(in); m->gobble(in);  //grab first '{'
+        
+        string matrixFormat = "";
+        int numRows = 0;
+        int numCols = 0;
+        int shapeNumRows = 0;
+        int shapeNumCols = 0;
+        vector<string> otuNames;
+        vector<string> groupNames;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string line = m->getline(in); m->gobble(in);
+            
+            string tag = getTag(line);
+            
+            if (tag == "type") {
+                //check to make sure this is an OTU table
+                string type = getTag(line);
+                if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
+            }else if (tag == "matrix_type") {
+                //get type and check type
+                matrixFormat = getTag(line);
+                if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+            }else if (tag == "matrix_element_type") {
+                //get type and check type
+                string matrixElementType = getTag(line);
+                if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
+            }else if (tag == "rows") {
+                //read otu names
+                otuNames = readRows(line, in, numRows);  
+            }else if (tag == "columns") {
+                //read sample names
+                groupNames = readRows(line, in, numCols); 
+                
+                //if users selected groups, then remove the groups not wanted.
+                SharedUtil util;
+                vector<string> Groups = m->getGroups();
+                vector<string> allGroups = groupNames;
+                util.setGroups(Groups, allGroups);
+                m->setGroups(Groups);
+                
+                //fill filehandles with neccessary ofstreams
+                int i;
+                ofstream* temp;
+                for (i=0; i<Groups.size(); i++) {
+                    temp = new ofstream;
+                    filehandles[Groups[i]] = temp;
+                }
+                
+                //set fileroot
+                fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+                
+                //clears file before we start to write to it below
+                for (int i=0; i<Groups.size(); i++) {
+                    m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+                    outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+                    outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+                }
+
+            }else if (tag == "shape") {
+                getDims(line, shapeNumRows, shapeNumCols);
+                
+                //check shape
+                if (shapeNumCols != numCols) {
+                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
+                }
+                
+                if (shapeNumRows != numRows) {
+                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
+                }
+            }else if (tag == "data") {
+                m->currentBinLabels = otuNames;
+                
+                //read data
+                vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
+
+                m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                lookup[0]->printHeaders(out); 
+                printSharedData(lookup, out);
+            }
+        }
+        in.close();
+        
+                
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "createSharedFromBiom");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
+       try {
+        
+        vector<SharedRAbundVector*> lookup; 
+        
+        //creates new sharedRAbunds
+        for (int i = 0; i < groupNames.size(); i++) {
+            SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
+            temp->setLabel("dummy");
+            temp->setGroup(groupNames[i]);
+            lookup.push_back(temp);
+        }
+        
+        bool dataStart = false;
+        bool inBrackets = false;
+        string num = "";
+        vector<int> nums;
+        int otuCount = 0;
+        for (int i = 0; i < line.length(); i++) {
+            
+            if (m->control_pressed) { return lookup; }
+            
+            //look for opening [ to indicate data is starting
+            if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++;  if (!(i < line.length())) { break; } }
+            else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
+                
+            if (dataStart) {
+                if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++;  if (!(i < line.length())) { break; } }
+                else if ((line[i] == ']') && (inBrackets)) { 
+                    inBrackets = false; 
+                    int temp;
+                    m->mothurConvert(num, temp);
+                    nums.push_back(temp);
+                    num = "";
+                    
+                    //save info to vectors
+                    if (matrixFormat == "dense") {
+                        
+                        //sanity check
+                        if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
+                        
+                        //set abundances for this otu
+                        //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
+                        for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
+                        
+                        otuCount++;
+                    }else {
+                        //sanity check
+                        if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
+                        
+                        //nums contains [otuNum, sampleNum, abundance]
+                        lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
+                    }
+                    nums.clear();
+                }
+                
+                if (inBrackets) {
+                    if (line[i] == ',') {
+                        int temp;
+                        m->mothurConvert(num, temp);
+                        nums.push_back(temp);
+                        num = "";
+                    }else { if (!isspace(line[i])) { num += line[i]; }  }
+                }
+            }
+        }
+        
+        //same as above just reading from file.
+        while (!in.eof()) {
+            
+            char c = in.get(); m->gobble(in);
+            
+            if (m->control_pressed) { return lookup; }
+            
+            //look for opening [ to indicate data is starting
+            if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get();  if (in.eof()) { break; } }
+            else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
+              
+            if (dataStart) {
+                if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get();  if (in.eof()) { break; }  }
+                else if ((c == ']') && (inBrackets)) { 
+                    inBrackets = false; 
+                    int temp;
+                    m->mothurConvert(num, temp);
+                    nums.push_back(temp);
+                    num = "";
+                    
+                    //save info to vectors
+                    if (matrixFormat == "dense") {
+                        
+                        //sanity check
+                        if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
+                        
+                        //set abundances for this otu
+                        //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
+                        for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
+                        
+                        otuCount++;
+                    }else {
+                        //sanity check
+                        if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
+                        
+                        //nums contains [otuNum, sampleNum, abundance]
+                        lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
+                    }
+                    nums.clear();
+                }
+                
+                if (inBrackets) {
+                    if (c == ',') {
+                        int temp;
+                        m->mothurConvert(num, temp);
+                        nums.push_back(temp);
+                        num = "";
+                    }else { if (!isspace(c)) { num += c; }  }
+                }
+            }
+        }
+        
+        SharedUtil util;
+        
+               bool remove = false;
+               for (int i = 0; i < lookup.size(); i++) {
+                       //if this sharedrabund is not from a group the user wants then delete it.
+                       if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { 
+                               remove = true;
+                               delete lookup[i]; lookup[i] = NULL;
+                               lookup.erase(lookup.begin()+i); 
+                               i--; 
+                       }
+               }
+               
+               if (remove) { eliminateZeroOTUS(lookup); }
+
+        
+        return lookup;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "readData");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
+    try {
+        
+        vector<SharedRAbundVector*> newLookup;
+        for (int i = 0; i < thislookup.size(); i++) {
+            SharedRAbundVector* temp = new SharedRAbundVector();
+            temp->setLabel(thislookup[i]->getLabel());
+            temp->setGroup(thislookup[i]->getGroup());
+            newLookup.push_back(temp);
+        }
+        
+        //for each bin
+        vector<string> newBinLabels;
+        string snumBins = toString(thislookup[0]->getNumBins());
+        for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+            
+            //look at each sharedRabund and make sure they are not all zero
+            bool allZero = true;
+            for (int j = 0; j < thislookup.size(); j++) {
+                if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+            }
+            
+            //if they are not all zero add this bin
+            if (!allZero) {
+                for (int j = 0; j < thislookup.size(); j++) {
+                    newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+                }
+                
+                //if there is a bin label use it otherwise make one
+                string binLabel = "Otu";
+                string sbinNumber = toString(i+1);
+                if (sbinNumber.length() < snumBins.length()) { 
+                    int diff = snumBins.length() - sbinNumber.length();
+                    for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                }
+                binLabel += sbinNumber; 
+                if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                
+                newBinLabels.push_back(binLabel);
+            }
+        }
+        
+        for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
+        
+        thislookup = newLookup;
+        m->currentBinLabels = newBinLabels;
+        
+        return 0;
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
+       try {
+        //get shape
+        bool inBar = false;
+        string num = "";
+        
+        for (int i = 0; i < line.length(); i++) {
+            
+            //you want to ignore any ; until you reach the next '
+            if ((line[i] == '[') && (!inBar)) {  inBar = true; i++;  if (!(i < line.length())) { break; } } 
+            else if ((line[i] == ']') && (inBar)) {  
+                inBar= false;  
+                m->mothurConvert(num, shapeNumCols);
+                break;
+            } 
+            
+            if (inBar) {  
+                if (line[i] == ',') {
+                    m->mothurConvert(num, shapeNumRows);
+                    num = "";
+                }else { if (!isspace(line[i])) { num += line[i]; }  }
+            }
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "getDims");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
+       try {
+        /*"rows":[
+         {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
+         {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
+         ...
+         ],*/
+        vector<string> names;
+        int countOpenBrace = 0;
+        int countClosedBrace = 0;
+        int openParen = 0;
+        int closeParen = 0;
+        string nextRow = "";
+        bool end = false;
+        
+        for (int i = 0; i < line.length(); i++) {
+            
+            if (m->control_pressed) { return names; }
+            
+            if (line[i] == '[')         { countOpenBrace++;     }
+            else if (line[i] == ']')    { countClosedBrace++;   }
+            else if (line[i] == '{')    { openParen++;          }
+            else if (line[i] == '}')    { closeParen++;         }
+            else if (openParen != 0)    { nextRow += line[i];   }  //you are reading the row info
+            
+            //you have reached the end of the rows info
+            if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
+            if ((openParen == closeParen) && (closeParen != 0)) { //process row 
+                numRows++;
+                vector<string> items;
+                m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
+                string part = items[0]; items.clear();
+                m->splitAtChar(part, items, ':'); //split part we want containing the ids
+                string name = items[1];
+                
+                //remove "" if needed
+                int pos = name.find("\"");
+                if (pos != string::npos) {
+                    string newName = "";
+                    for (int k = 0; k < name.length(); k++) {
+                        if (name[k] != '\"') { newName += name[k]; }
+                    }
+                    name = newName;
+                }
+                names.push_back(name);
+                nextRow = "";
+                openParen = 0;
+                closeParen = 0;
+            }
+        }
+        
+        //keep reading
+        if (!end) {
+            while (!in.eof()) {
+                
+                if (m->control_pressed) { break; }
+                
+                char c = in.get(); m->gobble(in);
+                
+                if (c == '[')               { countOpenBrace++;     }
+                else if (c == ']')          { countClosedBrace++;   }
+                else if (c == '{')          { openParen++;          }
+                else if (c == '}')          { closeParen++;         }
+                else if (openParen != 0)    { nextRow += c;         }  //you are reading the row info
+                
+                
+                //you have reached the end of the rows info
+                if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
+                if ((openParen == closeParen) && (closeParen != 0)) { //process row 
+                    numRows++;
+                    vector<string> items;
+                    m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
+                    string part = items[0]; items.clear();
+                    m->splitAtChar(part, items, ':'); //split part we want containing the ids
+                    string name = items[1];
+                    
+                    //remove "" if needed
+                    int pos = name.find("\"");
+                    if (pos != string::npos) {
+                        string newName = "";
+                        for (int k = 0; k < name.length(); k++) {
+                            if (name[k] != '\"') { newName += name[k]; }
+                        }
+                        name = newName;
+                    }
+                    names.push_back(name);
+                    nextRow = "";
+                    openParen = 0;
+                    closeParen = 0;
+                }  
+            }
+        }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "readRows");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//designed for things like "type": "OTU table", returns map type -> OTU table
+string SharedCommand::getTag(string& line) {
+       try {
+        bool inQuotes = false;
+        string tag = "";
+        char c = '\"';
+        
+        for (int i = 0; i < line.length(); i++) {
+        
+            //you want to ignore any ; until you reach the next '
+                       if ((line[i] == c) && (!inQuotes)) {  inQuotes = true;  } 
+                       else if ((line[i] == c) && (inQuotes)) {  
+                inQuotes= false;  
+                line = line.substr(i+1);
+                return tag;
+            } 
+            
+                       if (inQuotes) {  if (line[i] != c) { tag += line[i]; }  }
+        }
+        
+        return tag;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "getInfo");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SharedCommand::createSharedFromListGroup(string filename) {
+       try {
+        ofstream out;
+        m->openOutputFile(filename, out);
+        
+        GroupMap* groupMap = new GroupMap(groupfile);
+        
+        int groupError = groupMap->readMap();
+        if (groupError == 1) { delete groupMap; return 0; }
+        vector<string> allGroups = groupMap->getNamesOfGroups();
+        m->setAllGroups(allGroups);
+        
+        pickedGroups = false;
+        
+        //if hte user has not specified any groups then use them all
+        if (Groups.size() == 0) {
+            Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
+        }else { pickedGroups = true; }
+        
+        //fill filehandles with neccessary ofstreams
+        int i;
+        ofstream* temp;
+        for (i=0; i<Groups.size(); i++) {
+            temp = new ofstream;
+            filehandles[Groups[i]] = temp;
+        }
+        
+        //set fileroot
+        fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
+        
+        //clears file before we start to write to it below
+        for (int i=0; i<Groups.size(); i++) {
+            m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+            outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+            outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+        }
+        
+        string errorOff = "no error";
+        
+        //if user provided an order file containing the order the shared file should be in read it
+        //if (ordergroupfile != "") { readOrderFile(); }
+        
+        InputData input(listfile, "shared");
+        SharedListVector* SharedList = input.getSharedListVector();
+        string lastLabel = SharedList->getLabel();
+        vector<SharedRAbundVector*> lookup; 
+        
+        if (m->control_pressed) { 
+            delete SharedList; delete groupMap; 
+            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
+            out.close(); m->mothurRemove(filename); 
+            for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));                }
+            return 0; 
+        }
+        
+        //sanity check
+        vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
+        int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
+        
+        if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
+            m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
+            
+            out.close();
+            m->mothurRemove(filename); //remove blank shared file you made
+            
+            createMisMatchFile(SharedList, groupMap);
+            
+            //delete memory
+            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                delete it3->second;
+            }
+            
+            delete SharedList; delete groupMap; 
+            
+            return 0; 
+        }
+        
+        if (error == 1) { m->control_pressed = true; }
+        
+        //if user has specified groups make new groupfile for them
+        if (pickedGroups) { //make new group file
+            string groups = "";
+            if (m->getNumGroups() < 4) {
+                for (int i = 0; i < m->getNumGroups(); i++) {
+                    groups += (m->getGroups())[i] + ".";
+                }
+            }else { groups = "merge"; }
+            
+            string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + "groups";
+            outputTypes["group"].push_back(newGroupFile); 
+            outputNames.push_back(newGroupFile);
+            ofstream outGroups;
+            m->openOutputFile(newGroupFile, outGroups);
+            
+            vector<string> names = groupMap->getNamesSeqs();
+            string groupName;
+            for (int i = 0; i < names.size(); i++) {
+                groupName = groupMap->getGroup(names[i]);
+                if (isValidGroup(groupName, m->getGroups())) {
+                    outGroups << names[i] << '\t' << groupName << endl;
+                }
+            }
+            outGroups.close();
+        }
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;       
+        
+        while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            if (m->control_pressed) { 
+                delete SharedList; delete groupMap;
+                for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
+                out.close(); m->mothurRemove(filename); 
+                for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));            }
+                return 0; 
+            }
+            
+            if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
+                
+                lookup = SharedList->getSharedRAbundVector();
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                if (pickedGroups) { //check for otus with no seqs in them
+                    eliminateZeroOTUS(lookup);
+                }
+                
+                if (m->control_pressed) { 
+                    delete SharedList; delete groupMap; 
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                    for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
+                    out.close(); m->mothurRemove(filename); 
+                    for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));                }
+                    return 0; 
+                }
+                
+                if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                printSharedData(lookup, out); //prints info to the .shared file
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                
+                processedLabels.insert(SharedList->getLabel());
+                userLabels.erase(SharedList->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = SharedList->getLabel();
+                
+                delete SharedList;
+                SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
+                
+                lookup = SharedList->getSharedRAbundVector();
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                if (pickedGroups) { //check for otus with no seqs in them
+                    eliminateZeroOTUS(lookup);
+                }
+                
+                
+                if (m->control_pressed) { 
+                    delete SharedList; delete groupMap; 
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                    for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
+                    out.close(); m->mothurRemove(filename); 
+                    for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));                }
+                    return 0; 
+                }
+                
+                if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                printSharedData(lookup, out); //prints info to the .shared file
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                
+                processedLabels.insert(SharedList->getLabel());
+                userLabels.erase(SharedList->getLabel());
+                
+                //restore real lastlabel to save below
+                SharedList->setLabel(saveLabel);
+            }
+            
+            
+            lastLabel = SharedList->getLabel();
+            
+            delete SharedList;
+            SharedList = input.getSharedListVector(); //get new list vector to process
+        }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+            if (processedLabels.count(lastLabel) != 1) {
+                needToRun = true;
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            if (SharedList != NULL) {  delete SharedList;      }
+            SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
+            
+            lookup = SharedList->getSharedRAbundVector();
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            if (pickedGroups) { //check for otus with no seqs in them
+                eliminateZeroOTUS(lookup);
+            }
+            
+            if (m->control_pressed) { 
+                delete groupMap;
+                for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
+                out.close(); m->mothurRemove(filename); 
+                for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));            }
+                return 0; 
+            }
+            
+            if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+            printSharedData(lookup, out); //prints info to the .shared file
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+            delete SharedList;
+        }
+        
+        out.close();
+        
+        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+            delete it3->second;
+        }
+        
+        delete groupMap;
+               
+        if (m->control_pressed) { 
+            m->mothurRemove(filename); 
+            for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + ".rabund"));                }
+            return 0; 
+        }
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
        try {
                
                if (order.size() == 0) { //user has not specified an order so do aplabetically
@@ -517,50 +1034,7 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
-       try {
-               
-               vector<SharedRAbundVector*> newLookup;
-               for (int i = 0; i < thislookup.size(); i++) {
-                       SharedRAbundVector* temp = new SharedRAbundVector();
-                       temp->setLabel(thislookup[i]->getLabel());
-                       temp->setGroup(thislookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               //for each bin
-               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
-                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-               
-                       //look at each sharedRabund and make sure they are not all zero
-                       bool allZero = true;
-                       for (int j = 0; j < thislookup.size(); j++) {
-                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
-                       }
-                       
-                       //if they are not all zero add this bin
-                       if (!allZero) {
-                               for (int j = 0; j < thislookup.size(); j++) {
-                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
-                               }
-                               //if there is a bin label use it otherwise make one
-                       }
-                       //else{  cout << "bin # " << i << " is all zeros" << endl;  }
-               }
-       
-               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-               thislookup = newLookup;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SharedCommand::createMisMatchFile() {
+int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
        try {
                ofstream outMisMatch;
                string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
@@ -658,12 +1132,9 @@ int SharedCommand::createMisMatchFile() {
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::ListGroupSameSeqs() {
+int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
        try {
-               
                int error = 0; 
-               
-               vector<string> groupMapsSeqs = groupMap->getNamesSeqs();
                
                set<string> groupNamesSeqs;
                for(int i = 0; i < groupMapsSeqs.size(); i++) {
index 861632ce7d6a8026c14b463387b9d8dbb320522d..5d0daa2fdc484f55b78613541e1e82ef547a7d09 100644 (file)
@@ -38,20 +38,22 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       void printSharedData(vector<SharedRAbundVector*>);
-       int createMisMatchFile();
+       void printSharedData(vector<SharedRAbundVector*>, ofstream&);
+       int createMisMatchFile(SharedListVector*, GroupMap*);
        int readOrderFile();
        bool isValidGroup(string, vector<string>);
        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
-       int ListGroupSameSeqs();
+       int ListGroupSameSeqs(vector<string>&, SharedListVector*);
+    int createSharedFromListGroup(string);
+    int createSharedFromBiom(string);
+    string getTag(string&);
+    vector<string> readRows(string, ifstream&, int&); 
+    int getDims(string, int&, int&);
+    vector<SharedRAbundVector*> readData(string, string, ifstream&, vector<string>&, int);
        
-       SharedListVector* SharedList;
-       InputData* input;
-       GroupMap* groupMap;
        vector<string> Groups, outputNames, order;
        set<string> labels;
-       ofstream out;
-       string filename, fileroot, outputDir, listfile, groupfile, ordergroupfile;
+       string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile;
        bool firsttime, pickedGroups, abort, allLines;
        map<string, ofstream*> filehandles;
        map<string, ofstream*>::iterator it3;
diff --git a/uchime_src/addtargets2.cpp b/uchime_src/addtargets2.cpp
new file mode 100644 (file)
index 0000000..f3f6377
--- /dev/null
@@ -0,0 +1,38 @@
+#if    UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+const float MAX_WORD_COUNT_DROP = 1;\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path);\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts,\r
+  vector<unsigned> &Order);\r
+\r
+void AddTargets(SeqDB &DB, const SeqData &Query, set<unsigned> &TargetIndexes)\r
+       {\r
+       const unsigned SeqCount = DB.GetSeqCount();\r
+       if (SeqCount == 0)\r
+               return;\r
+\r
+       vector<float> WordCounts;\r
+       vector<unsigned> Order;\r
+       USort(Query, DB, WordCounts, Order);\r
+       asserta(SIZE(Order) == SeqCount);\r
+       unsigned TopSeqIndex = Order[0];\r
+       float TopWordCount = WordCounts[TopSeqIndex];\r
+       for (unsigned i = 0; i < SeqCount; ++i)\r
+               {\r
+               unsigned SeqIndex = Order[i];\r
+               float WordCount = WordCounts[SeqIndex];\r
+               if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP)\r
+                       return;\r
+               TargetIndexes.insert(SeqIndex);\r
+               }\r
+       }\r
+\r
+#endif\r
diff --git a/uchime_src/alignchime.cpp b/uchime_src/alignchime.cpp
new file mode 100644 (file)
index 0000000..d7b05a8
--- /dev/null
@@ -0,0 +1,649 @@
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+#include "dp.h"\r
+\r
+#define TRACE          0\r
+#define TRACE_BS       0\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit);\r
+\r
+double GetScore2(double Y, double N, double A)\r
+       {\r
+       return Y/(opt_xn*(N + opt_dn) + opt_xa*A);\r
+       }\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+       Hit.QLabel = QLabel;\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+       const unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+#if    TRACE\r
+       Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq);\r
+       Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq);\r
+       Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq);\r
+#endif\r
+\r
+// Discard terminal gaps\r
+       unsigned ColLo = UINT_MAX;\r
+       unsigned ColHi = UINT_MAX;\r
+       for (unsigned Col = 2; Col + 2 < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (isacgt(q) && isacgt(a) && isacgt(b))\r
+                       {\r
+                       if (ColLo == UINT_MAX)\r
+                               ColLo = Col;\r
+                       ColHi = Col;\r
+                       }\r
+               }\r
+\r
+       if (ColLo == UINT_MAX)\r
+               return;\r
+\r
+       unsigned QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned BPos = 0;\r
+       unsigned DiffCount = 0;\r
+\r
+       vector<unsigned> ColToQPos(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumCount(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumSameA(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumSameB(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumForA(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumForB(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumAbstain(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumAgainst(ColLo, UINT_MAX);\r
+\r
+       unsigned SumSameA = 0;\r
+       unsigned SumSameB = 0;\r
+       unsigned SumSameAB = 0;\r
+       unsigned Sum = 0;\r
+       unsigned SumForA = 0;\r
+       unsigned SumForB = 0;\r
+       unsigned SumAbstain = 0;\r
+       unsigned SumAgainst = 0;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (isacgt(q) && isacgt(a) && isacgt(b))\r
+                       {\r
+                       if (q == a)\r
+                               ++SumSameA;\r
+                       if (q == b)\r
+                               ++SumSameB;\r
+                       if (a == b)\r
+                               ++SumSameAB;\r
+                       if (q == a && q != b)\r
+                               ++SumForA;\r
+                       if (q == b && q != a)\r
+                               ++SumForB;\r
+                       if (a == b && q != a)\r
+                               ++SumAgainst;\r
+                       if (q != a && q != b)\r
+                               ++SumAbstain;\r
+                       ++Sum;\r
+                       }\r
+\r
+               ColToQPos.push_back(QPos);\r
+               AccumSameA.push_back(SumSameA);\r
+               AccumSameB.push_back(SumSameB);\r
+               AccumCount.push_back(Sum);\r
+               AccumForA.push_back(SumForA);\r
+               AccumForB.push_back(SumForB);\r
+               AccumAbstain.push_back(SumAbstain);\r
+               AccumAgainst.push_back(SumAgainst);\r
+\r
+               if (q != '-')\r
+                       ++QPos;\r
+               if (a != '-')\r
+                       ++APos;\r
+               if (b != '-')\r
+                       ++BPos;\r
+               }\r
+\r
+       asserta(SIZE(ColToQPos) == ColHi+1);\r
+       asserta(SIZE(AccumSameA) == ColHi+1);\r
+       asserta(SIZE(AccumSameB) == ColHi+1);\r
+       asserta(SIZE(AccumAbstain) == ColHi+1);\r
+       asserta(SIZE(AccumAgainst) == ColHi+1);\r
+\r
+       double IdQA = double(SumSameA)/Sum;\r
+       double IdQB = double(SumSameB)/Sum;\r
+       double IdAB = double(SumSameAB)/Sum;\r
+       double MaxId = max(IdQA, IdQB);\r
+\r
+#if    TRACE\r
+       Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0);\r
+       Log("\n");\r
+       Log("    x  AQB   IdAL   IdBL   IdAR   IdBR   DivAB   DivBA    YAL    YBL    YAR    YBR    AbL    AbR  ScoreAB  ScoreAB    XLo    Xhi\n");\r
+       Log("-----  ---  -----  -----  -----  -----  ------  ------  -----  -----  -----  -----  -----  -----  -------  -------  -----  -----\n");\r
+#endif\r
+       unsigned BestXLo = UINT_MAX;\r
+       unsigned BestXHi = UINT_MAX;\r
+       double BestDiv = 0.0;\r
+       double BestIdQM = 0.0;\r
+       double BestScore = 0.0;\r
+\r
+// Find range of cols BestXLo..BestXHi that maximizes score\r
+       bool FirstA = false;\r
+\r
+// NOTE: Must be < ColHi not <= because use Col+1 below\r
+       for (unsigned Col = ColLo; Col < ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               unsigned SameAL = AccumSameA[Col];\r
+               unsigned SameBL = AccumSameB[Col];\r
+               unsigned SameAR = SumSameA - AccumSameA[Col];\r
+               unsigned SameBR = SumSameB - AccumSameB[Col];\r
+\r
+               double IdAB = double(SameAL + SameBR)/Sum;\r
+               double IdBA = double(SameBL + SameAR)/Sum;\r
+\r
+               unsigned ForAL = AccumForA[Col];\r
+               unsigned ForBL = AccumForB[Col];\r
+               unsigned ForAR = SumForA - AccumForA[Col+1];\r
+               unsigned ForBR = SumForB - AccumForB[Col+1];\r
+               unsigned AbL = AccumAbstain[Col];\r
+               unsigned AbR = SumAbstain - AccumAbstain[Col+1];\r
+\r
+               double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR);\r
+               double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR);\r
+       \r
+               double DivAB = IdAB/MaxId;\r
+               double DivBA = IdBA/MaxId;\r
+               double MaxDiv = max(DivAB, DivBA);\r
+\r
+               //if (MaxDiv > BestDiv)\r
+               //      {\r
+               //      BestDiv = MaxDiv;\r
+               //      BestXLo = Col;\r
+               //      BestXHi = Col;\r
+               //      FirstA = (DivAB > DivBA);\r
+               //      if (FirstA)\r
+               //              BestIdQM = IdAB;\r
+               //      else\r
+               //              BestIdQM = IdBA;\r
+               //      }\r
+               //else if (MaxDiv == BestDiv)\r
+               //      BestXHi = Col;\r
+\r
+               double MaxScore = max(ScoreAB, ScoreBA);\r
+               if (MaxScore > BestScore)\r
+                       {\r
+                       BestScore = MaxScore;\r
+                       BestXLo = Col;\r
+                       BestXHi = Col;\r
+                       FirstA = (ScoreAB > ScoreBA);\r
+                       if (FirstA)\r
+                               BestIdQM = IdAB;\r
+                       else\r
+                               BestIdQM = IdBA;\r
+                       if (MaxDiv > BestDiv)\r
+                               BestDiv = MaxDiv;\r
+                       }\r
+               else if (MaxScore == BestScore)\r
+                       {\r
+                       BestXHi = Col;\r
+                       if (MaxDiv > BestDiv)\r
+                               BestDiv = MaxDiv;\r
+                       }\r
+\r
+#if    TRACE\r
+               {\r
+               Log("%5u", Col);\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               Log("  %c%c%c", a, q, b);\r
+               Log("  %5u", SameAL);\r
+               Log("  %5u", SameBL);\r
+               Log("  %5u", SameAR);\r
+               Log("  %5u", SameBR);\r
+               Log("  %5.4f", DivAB);\r
+               Log("  %5.4f", DivBA);\r
+               Log("  %5u", ForAL);\r
+               Log("  %5u", ForBL);\r
+               Log("  %5u", ForAR);\r
+               Log("  %5u", ForBR);\r
+               Log("  %5u", AbL);\r
+               Log("  %5u", AbR);\r
+               Log("  %7.4f", ScoreAB);\r
+               Log("  %7.4f", ScoreBA);\r
+               if (BestXLo != UINT_MAX)\r
+                       Log("  %5u", BestXLo);\r
+               if (BestXHi != UINT_MAX)\r
+                       Log("  %5u", BestXHi);\r
+               Log("\n");\r
+               }\r
+#endif\r
+               }\r
+\r
+       if (BestXLo == UINT_MAX)\r
+               {\r
+#if    TRACE\r
+               Log("\n");\r
+               Log("No crossover found.\n");\r
+#endif\r
+               return;\r
+               }\r
+#if    TRACE\r
+       Log("BestX col %u - %u\n", BestXLo, BestXHi);\r
+#endif\r
+\r
+// Find maximum region of identity within BestXLo..BestXHi\r
+       unsigned ColXLo = (BestXLo + BestXHi)/2;\r
+       unsigned ColXHi = ColXLo;\r
+       unsigned SegLo = UINT_MAX;\r
+       unsigned SegHi = UINT_MAX;\r
+       for (unsigned Col = BestXLo; Col <= BestXHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (q == a && q == b)\r
+                       {\r
+                       if (SegLo == UINT_MAX)\r
+                               SegLo = Col;\r
+                       SegHi = Col;\r
+                       }\r
+               else\r
+                       {\r
+                       unsigned SegLength = SegHi - SegLo + 1;\r
+                       unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+                       if (SegLength > BestSegLength)\r
+                               {\r
+                               ColXLo = SegLo;\r
+                               ColXHi = SegHi;\r
+                               }\r
+                       SegLo = UINT_MAX;\r
+                       SegHi = UINT_MAX;\r
+                       }\r
+               }\r
+       unsigned SegLength = SegHi - SegLo + 1;\r
+       unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+       if (SegLength > BestSegLength)\r
+               {\r
+               ColXLo = SegLo;\r
+               ColXHi = SegHi;\r
+               }\r
+\r
+       QPos = 0;\r
+       for (unsigned x = 0; x < ColCount; ++x)\r
+               {\r
+               if (x == ColXLo)\r
+                       Hit.QXLo = QPos;\r
+               else if (x == ColXHi)\r
+                       {\r
+                       Hit.QXHi = QPos;\r
+                       break;\r
+                       }\r
+               char q = Q3Seq[x];\r
+               if (q != '-')\r
+                       ++QPos;\r
+               }\r
+\r
+       Hit.ColXLo = ColXLo;\r
+       Hit.ColXHi = ColXHi;\r
+\r
+       //if (FirstA)\r
+       //      {\r
+       //      Hit.LY = AccumForA[ColXLo];\r
+       //      Hit.LN = AccumForB[ColXLo];\r
+\r
+       //      Hit.RY = SumForB - AccumForB[ColXHi];\r
+       //      Hit.RN = SumForA - AccumForA[ColXHi];\r
+       //      }\r
+       //else\r
+       //      {\r
+       //      Hit.LY = AccumForB[ColXLo];\r
+       //      Hit.LN = AccumForA[ColXLo];\r
+       //      Hit.RY = SumForA - AccumForA[ColXHi];\r
+       //      Hit.RN = SumForB - AccumForB[ColXHi];\r
+       //      }\r
+\r
+       //Hit.LA = AccumAgainst[ColXLo];\r
+       //Hit.LD = AccumAbstain[ColXLo];\r
+\r
+       //Hit.RA = SumAgainst - AccumAgainst[ColXHi];\r
+       //Hit.RD = SumAbstain - AccumAbstain[ColXHi];\r
+\r
+       Hit.PctIdAB = IdAB*100.0;\r
+       Hit.PctIdQM = BestIdQM*100.0;\r
+\r
+       Hit.Div = (BestDiv - 1.0)*100.0;\r
+\r
+       //Hit.QSD = QSD;\r
+       Hit.Q3 = Q3;\r
+       Hit.QLabel = QLabel;\r
+       if (FirstA)\r
+               {\r
+               //Hit.ASD = ASD;\r
+               //Hit.BSD = BSD;\r
+               //Hit.PathQA = PathQA;\r
+               //Hit.PathQB = PathQB;\r
+               Hit.A3 = A3;\r
+               Hit.B3 = B3;\r
+               Hit.ALabel = ALabel;\r
+               Hit.BLabel = BLabel;\r
+               Hit.PctIdQA = IdQA*100.0;\r
+               Hit.PctIdQB = IdQB*100.0;\r
+               }\r
+       else\r
+               {\r
+               Hit.A3 = B3;\r
+               Hit.B3 = A3;\r
+               Hit.ALabel = BLabel;\r
+               Hit.BLabel = ALabel;\r
+               Hit.PctIdQA = IdQB*100.0;\r
+               Hit.PctIdQB = IdQA*100.0;\r
+               }\r
+\r
+// CS SNPs\r
+       Hit.CS_LY = 0;\r
+       Hit.CS_LN = 0;\r
+       Hit.CS_RY = 0;\r
+       Hit.CS_RN = 0;\r
+       Hit.CS_LA = 0;\r
+       Hit.CS_RA = 0;\r
+\r
+       //vector<float> Cons;\r
+       //for (unsigned Col = 0; Col < ColCount; ++Col)\r
+       //      {\r
+       //      char q = Q3Seq[Col];\r
+       //      char a = A3Seq[Col];\r
+       //      char b = B3Seq[Col];\r
+       //      if (q == a && q == b && a == b)\r
+       //              {\r
+       //              Cons.push_back(1.0f);\r
+       //              continue;\r
+       //              }\r
+\r
+       //      bool gapq = isgap(q);\r
+       //      bool gapa = isgap(a);\r
+       //      bool gapb = isgap(b);\r
+\r
+       //      if (!gapq && !gapa && !gapb)\r
+       //              {\r
+       //              if (q == a || q == b || a == b)\r
+       //                      Cons.push_back(0.75);\r
+       //              else\r
+       //                      Cons.push_back(0.5);\r
+       //              }\r
+       //      else\r
+       //              {\r
+       //              if (!gapa && (a == b || a == q))\r
+       //                      Cons.push_back(0.5f);\r
+       //              else if (!gapb && b == q)\r
+       //                      Cons.push_back(0.5f);\r
+       //              else\r
+       //                      Cons.push_back(0.0f);\r
+       //              }\r
+       //      }\r
+\r
+       //float fLY = 0.0f;\r
+       //float fLN = 0.0f;\r
+       //float fLA = 0.0f;\r
+       //float fRY = 0.0f;\r
+       //float fRN = 0.0f;\r
+       //float fRA = 0.0f;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+\r
+               unsigned ngaps = 0;\r
+               if (isgap(q))\r
+                       ++ngaps;\r
+               if (isgap(a))\r
+                       ++ngaps;\r
+               if (isgap(b))\r
+                       ++ngaps;\r
+\r
+               if (opt_skipgaps)\r
+                       {\r
+                       if (ngaps == 3)\r
+                               continue;\r
+                       }\r
+               else\r
+                       {\r
+                       if (ngaps == 2)\r
+                               continue;\r
+                       }\r
+\r
+               if (!FirstA)\r
+                       swap(a, b);\r
+\r
+               //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4;\r
+               //if (Col < ColXLo)\r
+               //      {\r
+               //      if (q == a && q != b)\r
+               //              fLY += AvgCons;\r
+               //      else if (q == b && q != a)\r
+               //              fLN += AvgCons;\r
+               //      else\r
+               //              fLA += AvgCons;\r
+               //      }\r
+               //else if (Col > ColXHi)\r
+               //      {\r
+               //      if (q == b && q != a)\r
+               //              fRY += AvgCons;\r
+               //      else if (q == a && q != b)\r
+               //              fRN += AvgCons;\r
+               //      else\r
+               //              fRA += AvgCons;\r
+               //      }\r
+\r
+               if (opt_skipgaps2)\r
+                       {\r
+                       if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                               continue;\r
+                       if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                               continue;\r
+                       }\r
+\r
+               //if (Col > 0 && isgap(Q3Seq[Col-1]))\r
+                       //continue;\r
+               //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1]))\r
+               //      continue;\r
+\r
+               if (Col < ColXLo)\r
+                       {\r
+                       if (q == a && q != b)\r
+                               ++Hit.CS_LY;\r
+                       else if (q == b && q != a)\r
+                               ++Hit.CS_LN;\r
+                       else\r
+                               ++Hit.CS_LA;\r
+                       }\r
+               else if (Col > ColXHi)\r
+                       {\r
+                       if (q == b && q != a)\r
+                               ++Hit.CS_RY;\r
+                       else if (q == a && q != b)\r
+                               ++Hit.CS_RN;\r
+                       else\r
+                               ++Hit.CS_RA;\r
+                       }\r
+               }\r
+\r
+       double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+       double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+       Hit.Score = ScoreL*ScoreR;\r
+\r
+       extern bool g_UchimeDeNovo;\r
+\r
+       //if (0)//g_UchimeDeNovo)\r
+       //      {\r
+       //      double AbQ = GetAbFromLabel(QLabel.c_str());\r
+       //      double AbA = GetAbFromLabel(ALabel.c_str());\r
+       //      double AbB = GetAbFromLabel(BLabel.c_str());\r
+       //      if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0)\r
+       //              {\r
+       //              double MinAb = min(AbA, AbB);\r
+       //              double Ratio = MinAb/AbQ;\r
+       //              double t = Ratio - opt_abx;\r
+       //      //      double Factor = 2.0/(1.0 + exp(-t));\r
+       //              double Factor = min(Ratio, opt_abx)/opt_abx;\r
+       //              if (opt_verbose)\r
+       //                      Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str());\r
+       //              Hit.Score *= Factor;\r
+       //              }\r
+       //      }\r
+\r
+       extern FILE *g_fUChimeAlns;\r
+       if (g_fUChimeAlns != 0 && Hit.Div > 0.0)\r
+               {\r
+               void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit);\r
+               WriteChimeHitX(g_fUChimeAlns, Hit);\r
+               }\r
+       }\r
+\r
+void AlignChime3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       if (opt_ucl)\r
+               AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+       else\r
+               AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+       }\r
+\r
+static void StripGaps(const byte *Seq, unsigned L, string &s)\r
+       {\r
+       s.clear();\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               char c = Seq[i];\r
+               if (!isgap(c))\r
+                       s.push_back(c);\r
+               }\r
+       }\r
+\r
+static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut)\r
+       {\r
+       SDOut = SDIn;\r
+       byte *s = myalloc(byte, SDIn.L);\r
+       unsigned k = 0;\r
+       for (unsigned i = 0; i < SDIn.L; ++i)\r
+               {\r
+               char c = SDIn.Seq[i];\r
+               if (!isgap(c))\r
+                       s[k++] = toupper(c);\r
+               }\r
+       SDOut.Seq = s;\r
+       SDOut.L = k;\r
+       }\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB, ChimeHit2 &Hit)\r
+       {\r
+       //if (opt_ucl)\r
+       //      {\r
+       //      AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+       //      return;\r
+       //      }\r
+\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+       Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3);\r
+\r
+       AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit);\r
+       }\r
+\r
+void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       SeqData QSD;\r
+       SeqData ASD;\r
+       SeqData BSD;\r
+       StripGapsAlloc(QSD3, QSD);\r
+       StripGapsAlloc(ASD3, ASD);\r
+       StripGapsAlloc(BSD3, BSD);\r
+\r
+       string PathQA;\r
+       string PathQB;\r
+       bool FoundQA = GlobalAlign(QSD, ASD, PathQA);\r
+       bool FoundQB = GlobalAlign(QSD, BSD, PathQB);\r
+       if (!FoundQA || !FoundQB)\r
+               {\r
+               Hit.Clear();\r
+               Hit.QLabel = QSD3.Label;\r
+               return;\r
+               }\r
+\r
+       AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+\r
+       myfree((void *) QSD.Seq);\r
+       myfree((void *) ASD.Seq);\r
+       myfree((void *) BSD.Seq);\r
+       }\r
+\r
+void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       if (opt_realign)\r
+               {\r
+               AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit);\r
+               return;\r
+               }\r
+\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+\r
+       const unsigned ColCount = QSD3.L;\r
+       asserta(ASD3.L == ColCount && BSD3.L == ColCount);\r
+\r
+       Q3.reserve(ColCount);\r
+       A3.reserve(ColCount);\r
+       B3.reserve(ColCount);\r
+\r
+       const byte *QS = QSD3.Seq;\r
+       const byte *AS = ASD3.Seq;\r
+       const byte *BS = BSD3.Seq;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               byte q = toupper(QS[Col]);\r
+               byte a = toupper(AS[Col]);\r
+               byte b = toupper(BS[Col]);\r
+\r
+               if (isgap(q) && isgap(a) && isgap(b))\r
+                       continue;\r
+\r
+               Q3.push_back(q);\r
+               A3.push_back(a);\r
+               B3.push_back(b);\r
+               }\r
+\r
+       AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit);\r
+       }\r
diff --git a/uchime_src/alignchimel.cpp b/uchime_src/alignchimel.cpp
new file mode 100644 (file)
index 0000000..ae152af
--- /dev/null
@@ -0,0 +1,417 @@
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+\r
+#define        TRACE   0\r
+\r
+/***\r
+Let:\r
+       S[i] =  Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
+\r
+       V[k] =  Best segment score from j, j+1 .. k for all possible j\r
+                       max(j) Sum i=j..k S[i]\r
+\r
+Recursion relation:\r
+       V[k] =  S[k] + max (V[k-1], 0)\r
+***/\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit);\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3);\r
+\r
+double GetScore2(double Y, double N, double A);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+       const unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       vector<float> ColScoresA(ColCount, 0.0f);\r
+       vector<float> ColScoresB(ColCount, 0.0f);\r
+\r
+       float ScoreN = -(float) opt_xn;\r
+       unsigned QL = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q))\r
+                       ++QL;\r
+\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (q == a && q != b)\r
+                       ColScoresA[Col] = 1;\r
+               else\r
+                       ColScoresA[Col] = ScoreN;\r
+\r
+               if (q == b && q != a)\r
+                       ColScoresB[Col] = 1;\r
+               else\r
+                       ColScoresB[Col] = ScoreN;\r
+               }\r
+\r
+       vector<float> LVA(ColCount, 0.0f);\r
+       vector<float> LVB(ColCount, 0.0f);\r
+\r
+       LVA[0] = ColScoresA[0];\r
+       LVB[0] = ColScoresB[0];\r
+       for (unsigned Col = 1; Col < ColCount; ++Col)\r
+               {\r
+               LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
+               LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       vector<float> RVA(ColCount, 0.0f);\r
+       vector<float> RVB(ColCount, 0.0f);\r
+\r
+       RVA[ColCount-1] = ColScoresA[ColCount-1];\r
+       RVB[ColCount-1] = ColScoresB[ColCount-1];\r
+       for (int Col = ColCount-2; Col >= 0; --Col)\r
+               {\r
+               RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
+               RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       bool FirstA = true;\r
+       float MaxSum = 0.0;\r
+       unsigned ColX = UINT_MAX;\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVA[Col] + RVB[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = true;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVB[Col] + RVA[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = false;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+       if (ColX == UINT_MAX)\r
+               return;\r
+\r
+       unsigned ColLo = UINT_MAX;\r
+       unsigned ColHi = UINT_MAX;\r
+       if (FirstA)\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= LVA[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVA[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= RVB[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVB[ColX]);\r
+               }\r
+       else\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= LVB[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVB[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= RVA[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVA[ColX]);\r
+               }\r
+\r
+       unsigned ColXHi = ColX;\r
+       for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXHi = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned ColXLo = ColX;\r
+       for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXLo = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned IdQA = 0;\r
+       unsigned IdQB = 0;\r
+       unsigned IdAB = 0;\r
+       unsigned NQA = 0;\r
+       unsigned NQB = 0;\r
+       unsigned NAB = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       ++NQA;\r
+                       if (q == a)\r
+                               ++IdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       ++NQB;\r
+                       if (q == b)\r
+                               ++IdQB;\r
+                       }\r
+\r
+               if (!isgap(a) && !isgap(b))\r
+                       {\r
+                       ++NAB;\r
+                       if (a == b)\r
+                               ++IdAB;\r
+                       }\r
+               }\r
+\r
+       Hit.PctIdQA = Pct(IdQA, NQA);\r
+       Hit.PctIdQB = Pct(IdQB, NQB);\r
+       Hit.PctIdAB = Pct(IdAB, NAB);\r
+\r
+       unsigned LIdQA = 0;\r
+       unsigned LIdQB = 0;\r
+       for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++LIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++LIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned RIdQA = 0;\r
+       unsigned RIdQB = 0;\r
+       for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++RIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++RIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
+       unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
+       unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
+       unsigned ColRange = ColHi - ColLo + 1;\r
+       if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
+               return;\r
+\r
+//     double Div = Pct(MinIdDiff, QSD.L);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("  Col  A Q B   ScoreA   ScoreB      LVA      LVB      RVA      RVB\n");\r
+       Log("-----  - - -  -------  -------  -------  -------  -------  -------\n");\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
+                       continue;\r
+\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               Log("%5u  %c %c %c", Col, a, q, b);\r
+\r
+               if (ColScoresA[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresA[Col]);\r
+\r
+               if (ColScoresB[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresB[Col]);\r
+\r
+               Log("  %7.1f  %7.1f  %7.1f  %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
+\r
+               Log("\n");\r
+               }\r
+       Log("\n");\r
+       Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
+         MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
+       Log("  LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
+       }\r
+#endif\r
+\r
+       string Q3L;\r
+       string A3L;\r
+       string B3L;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3[Col];\r
+               char a = A3[Col];\r
+               char b = B3[Col];\r
+\r
+               Q3L += q;\r
+               A3L += a;\r
+               B3L += b;\r
+               }\r
+\r
+       AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
+\r
+#if    0\r
+// CS SNPs\r
+       Hit.CS_LY = 0;\r
+       Hit.CS_LN = 0;\r
+       Hit.CS_RY = 0;\r
+       Hit.CS_RN = 0;\r
+       Hit.CS_LA = 0;\r
+       Hit.CS_RA = 0;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (!FirstA)\r
+                       swap(a, b);\r
+\r
+               if (Col < ColXLo)\r
+                       {\r
+                       if (q == a && q != b)\r
+                               ++Hit.CS_LY;\r
+                       else if (q == b && q != a)\r
+                               ++Hit.CS_LN;\r
+                       else\r
+                               ++Hit.CS_LA;\r
+                       }\r
+               else if (Col > ColXHi)\r
+                       {\r
+                       if (q == b && q != a)\r
+                               ++Hit.CS_RY;\r
+                       else if (q == a && q != b)\r
+                               ++Hit.CS_RN;\r
+                       else\r
+                               ++Hit.CS_RA;\r
+                       }\r
+               }\r
+\r
+       double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+       double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+       Hit.Score = ScoreL*ScoreR;\r
+\r
+       //Hit.QSD = QSD;\r
+       //if (FirstA)\r
+       //      {\r
+       //      Hit.ASD = ASD;\r
+       //      Hit.BSD = BSD;\r
+       //      Hit.PathQA = PathQA;\r
+       //      Hit.PathQB = PathQB;\r
+       //      }\r
+       //else\r
+       //      {\r
+       //      Hit.ASD = BSD;\r
+       //      Hit.BSD = ASD;\r
+       //      }\r
+\r
+       //Hit.ColLo = ColLo;\r
+       //Hit.ColXLo = ColXLo;\r
+       //Hit.ColXHi = ColXHi;\r
+       //Hit.ColHi = ColHi;\r
+       //Hit.Div = Div;\r
+\r
+//     Hit.LogMe();\r
+#endif\r
+       }\r
diff --git a/uchime_src/allocs.h b/uchime_src/allocs.h
new file mode 100644 (file)
index 0000000..157d03e
--- /dev/null
@@ -0,0 +1,24 @@
+A(Alpha)\r
+A(Mx)\r
+A(ChainBrute)\r
+A(Chainer)\r
+A(Test)\r
+A(CompressPath)\r
+A(HSPFinder)\r
+A(Main)\r
+A(Clumps)\r
+A(Path)\r
+A(SeqDB)\r
+A(SFasta)\r
+A(SWUngapped)\r
+A(AllocBit)\r
+A(Ultra)\r
+A(UPGMA)\r
+A(Windex)\r
+A(XDropBwd)\r
+A(Xlat)\r
+A(MPath)\r
+A(ScoreCache)\r
+A(TargetHits)\r
+A(Out)\r
+A(Hashdex)\r
diff --git a/uchime_src/alnheuristics.h b/uchime_src/alnheuristics.h
new file mode 100644 (file)
index 0000000..9a8d283
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef alnheuristics_h\r
+#define alnheuristics_h\r
+\r
+struct AlnParams;\r
+\r
+struct AlnHeuristics\r
+       {\r
+       unsigned BandRadius;\r
+       unsigned HSPFinderWordLength;\r
+       float SeedT;\r
+\r
+       float XDropG;                   //  GappedBlast default\r
+       float XDropU;                   //  UngappedBlast default\r
+       float XDropUG;                  //  UngappedBlast called by GappedBlast\r
+\r
+       unsigned MinGlobalHSPLength;\r
+\r
+       AlnHeuristics();\r
+       void InitFromCmdLine(const AlnParams &AP);\r
+       void InitGlobalFull();\r
+\r
+       bool IsGlobalFull() const\r
+               {\r
+               return MinGlobalHSPLength == 0 && BandRadius == 0;\r
+               }\r
+\r
+       };\r
+\r
+#endif // alnheuristics_h\r
diff --git a/uchime_src/alnparams.cpp b/uchime_src/alnparams.cpp
new file mode 100644 (file)
index 0000000..d1b9036
--- /dev/null
@@ -0,0 +1,414 @@
+#include "myutils.h"\r
+#include <float.h>     // for FLT_MAX\r
+#include "mx.h"\r
+#include "alnparams.h"\r
+#include "hsp.h"\r
+\r
+#define TEST   0\r
+\r
+void SetBLOSUM62();
+void SetNucSubstMx(double Match, double Mismatch);\r
+void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
+
+extern Mx<float> g_SubstMxf;
+extern float **g_SubstMx;
+\r
+void AlnParams::Clear()\r
+       {\r
+       SubstMxName = 0;\r
+       LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
+       LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       Nucleo = false;\r
+       NucleoSet = false;\r
+       }\r
+\r
+bool AlnParams::Is2() const\r
+       {\r
+       float g = OpenA;\r
+       float e = ExtA;\r
+       if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
+               return false;\r
+       if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+bool AlnParams::Is4() const\r
+       {\r
+       float g = OpenA;\r
+       float tg = LOpenA;\r
+       float e = ExtA;\r
+       float te = LExtA;\r
+       if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
+               return false;\r
+       if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+const char *AlnParams::GetType() const\r
+       {\r
+       if (Is2())\r
+               return "2";\r
+       else if (Is4())\r
+               return "4";\r
+       return "12";\r
+       }\r
+\r
+void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
+       ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
+       }\r
+\r
+void AlnParams::SetLocal(float Open, float Ext)\r
+       {\r
+       LocalOpen = Open;\r
+       LocalExt = Ext;\r
+       }\r
+\r
+void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
+  float TermOpen, float TermExt)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = Open;\r
+       LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
+       ExtA = ExtB = Ext;\r
+       LExtA = LExtB = RExtA = RExtB = TermExt;\r
+       }\r
+\r
+void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
+  unsigned LA, unsigned LB)\r
+       {\r
+       SubstMx = AP.SubstMx;\r
+       OpenA = AP.OpenA;\r
+       OpenB = AP.OpenB;\r
+       ExtA = AP.ExtA;\r
+       ExtB = AP.ExtB;\r
+\r
+       if (HSP.LeftA())\r
+               {\r
+               LOpenA = AP.LOpenA;\r
+               LExtA = AP.LExtA;\r
+               }\r
+       else\r
+               {\r
+               LOpenA = AP.OpenA;\r
+               LExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.LeftB())\r
+               {\r
+               LOpenB = AP.LOpenB;\r
+               LExtB = AP.LExtB;\r
+               }\r
+       else\r
+               {\r
+               LOpenB = AP.OpenB;\r
+               LExtB = AP.ExtB;\r
+               }\r
+\r
+       if (HSP.RightA(LA))\r
+               {\r
+               ROpenA = AP.ROpenA;\r
+               RExtA = AP.RExtA;\r
+               }\r
+       else\r
+               {\r
+               ROpenA = AP.OpenA;\r
+               RExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.RightB(LB))\r
+               {\r
+               ROpenB = AP.ROpenB;\r
+               RExtB = AP.RExtB;\r
+               }\r
+       else\r
+               {\r
+               ROpenB = AP.OpenB;\r
+               RExtB = AP.ExtB;\r
+               }\r
+       }\r
+\r
+void AlnParams::LogMe() const\r
+       {\r
+       Log("AlnParams(%s)", GetType());\r
+       if (Is2())\r
+               Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
+       else if (Is4())\r
+               Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
+       else\r
+               Log(\r
+" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
+                 OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
+       Log("\n");\r
+       }\r
+\r
+/***\r
+Open/Ext format string is one or more:\r
+       [<flag><flag>...]<value>\r
+\r
+Value is (positive) penalty or * (disabled).\r
+Flag is:\r
+       Q               Query.\r
+       T               Target sequence.\r
+       I               Internal gaps (defafault internal and terminal).\r
+       E               End gaps (default internal and terminal).\r
+       L               Left end.\r
+       R               Right end.\r
+***/\r
+\r
+static void ParseGapStr(const string &s,\r
+  float &QI, float &QL, float &QR,\r
+  float &TI, float &TL, float &TR)\r
+       {\r
+       if (s.empty())\r
+               return;\r
+\r
+       bool Q = false;\r
+       bool T = false;\r
+       bool I = false;\r
+       bool E = false;\r
+       bool L = false;\r
+       bool R = false;\r
+\r
+       const unsigned K = SIZE(s);\r
+       unsigned Dec = 0;\r
+       float Value = FLT_MAX;\r
+       for (unsigned i = 0; i <= K; ++i)\r
+               {\r
+               char c = s.c_str()[i];\r
+               if (c == 0 || c == '/')\r
+                       {\r
+                       if (Value == FLT_MAX)\r
+                               Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
+                       if (!Q && !T && !I && !E && !L && !R)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               L = true;\r
+                               R = true;\r
+                               I = true;\r
+                               }\r
+\r
+                       if (!E && !I && !L && !R)\r
+                               {\r
+                               E = false;\r
+                               I = true;\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (E)\r
+                               {\r
+                               if (L || R)\r
+                                       Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (!Q && !T)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               }\r
+\r
+                       if (Q && L)\r
+                               QL = -Value;\r
+                       if (Q && R)\r
+                               QR = -Value;\r
+                       if (Q && I)\r
+                               QI = -Value;\r
+                       if (T && L)\r
+                               TL = -Value;\r
+                       if (T && R)\r
+                               TR = -Value;\r
+                       if (T && I)\r
+                               TI = -Value;\r
+                       \r
+                       Value = FLT_MAX;\r
+                       Dec = 0;\r
+                       Q = false;\r
+                       T = false;\r
+                       I = false;\r
+                       E = false;\r
+                       L = false;\r
+                       R = false;\r
+                       }\r
+               else if (c == '*')\r
+                       {\r
+                       if (Value != FLT_MAX)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       Value = -MINUS_INFINITY;\r
+                       }\r
+               else if (isdigit(c))\r
+                       {\r
+                       if (Value == -MINUS_INFINITY)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       if (Value == FLT_MAX)\r
+                               Value = 0.0;\r
+                       if (Dec > 0)\r
+                               {\r
+                               Dec *= 10;\r
+                               Value += float(c - '0')/Dec;\r
+                               }\r
+                       else\r
+                               Value = Value*10 + (c - '0');\r
+                       }\r
+               else if (c == '.')\r
+                       {\r
+                       if (Dec > 0)\r
+                               Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
+                       Dec = 1;\r
+                       }\r
+               else\r
+                       {\r
+                       switch (c)\r
+                               {\r
+                       case 'Q':\r
+                               Q = true;\r
+                               break;\r
+                       case 'T':\r
+                               T = true;\r
+                               break;\r
+                       case 'I':\r
+                               I = true;\r
+                               break;\r
+                       case 'L':\r
+                               L = true;\r
+                               break;\r
+                       case 'R':\r
+                               R = true;\r
+                               break;\r
+                       case 'E':\r
+                               E = true;\r
+                               break;\r
+                       default:\r
+                               Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
+                               }\r
+                       }\r
+               }\r
+       }\r
+\r
+void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
+       {\r
+       ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
+       ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
+       }\r
+\r
+void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
+       {\r
+       if (IsNucleo)\r
+               SetNucSubstMx(opt_match, opt_mismatch);
+       else\r
+               {\r
+               if (opt_matrix == "")\r
+                       {\r
+                       SubstMxName = "BLOSUM62";\r
+                       SetBLOSUM62();
+                       }
+               else\r
+                       {\r
+                       ReadSubstMx(opt_matrix, g_SubstMxf);\r
+                       g_SubstMx = g_SubstMxf.GetData();\r
+                       g_SubstMxf.LogMe();\r
+                       SubstMxName = opt_matrix.c_str();\r
+                       }\r
+               }\r
+       SubstMx = g_SubstMx;\r
+       asserta(SubstMx != 0);\r
+       }\r
+\r
+void AlnParams::InitFromCmdLine(bool IsNucleo)\r
+       {\r
+       Clear();\r
+       Nucleo = IsNucleo;\r
+       NucleoSet = true;\r
+\r
+       SetMxFromCmdLine(IsNucleo);\r
+\r
+// Local\r
+       if (optset_lopen || optset_lext)\r
+               {\r
+               if (!optset_lopen || !optset_lext)\r
+                       Die("Must set both --lopen and --lext");\r
+               if (opt_lopen < 0.0 || opt_lext < 0.0)\r
+                       Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
+               SetLocal(float(-opt_lopen), float(-opt_lext));\r
+               }\r
+       else\r
+               {\r
+       // Same penalties, if-statement to note could differ.\r
+               if (IsNucleo)\r
+                       SetLocal(-10.0f, -1.0f);\r
+               else\r
+                       SetLocal(-10.0f, -1.0f);\r
+               }\r
+\r
+// Global\r
+       if (IsNucleo)\r
+               Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
+       else\r
+               Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
+       SetPenalties(opt_gapopen, opt_gapext);\r
+       }\r
+\r
+float AlnParams::GetLocalOpen() const\r
+       {\r
+       return LocalOpen;\r
+       }\r
+\r
+float AlnParams::GetLocalExt() const\r
+       {\r
+       return LocalExt;\r
+       }\r
+\r
+bool AlnParams::GetIsNucleo() const\r
+       {\r
+       asserta(NucleoSet);\r
+       return Nucleo;\r
+       }\r
+\r
+unsigned GetWindexWordLength(bool Nucleo)\r
+       {\r
+       if (optset_w)\r
+               return opt_w;\r
+\r
+       if (Nucleo)\r
+               return 8;\r
+       else\r
+               return 5;\r
+       }\r
+\r
+#if    TEST\r
+static void Test1(const string &os, const string &es)\r
+       {\r
+       AlnParams AP;\r
+       Log("\n");\r
+       Log("OpenStr %s\n", os.c_str());\r
+       Log(" ExtStr %s\n", es.c_str());\r
+       AP.SetPenalties(os, es);\r
+       AP.LogMe();\r
+       }\r
+\r
+void TestGapStr()\r
+       {\r
+       Test1("17I/0.5E", "1I/0.5E");\r
+       Test1("17I/0.5L/0.4R", "1Q/2T");\r
+       Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
+       }\r
+#endif // TEST\r
diff --git a/uchime_src/alnparams.h b/uchime_src/alnparams.h
new file mode 100644 (file)
index 0000000..4037912
--- /dev/null
@@ -0,0 +1,59 @@
+#ifndef alnparams_h\r
+#define alnparams_h\r
+\r
+struct HSPData;\r
+\r
+// Gap penalty scores are negative\r
+// (i.e., are scores, not penalties).\r
+struct AlnParams\r
+       {\r
+       const char *SubstMxName;\r
+       const float * const *SubstMx;\r
+\r
+       bool Nucleo;\r
+       bool NucleoSet;\r
+\r
+// Local gaps\r
+       float LocalOpen;\r
+       float LocalExt;\r
+\r
+// Global internal gaps\r
+       float OpenA;\r
+       float OpenB;\r
+\r
+       float ExtA;\r
+       float ExtB;\r
+\r
+// Global terminal gaps\r
+       float LOpenA;\r
+       float LOpenB;\r
+       float ROpenA;\r
+       float ROpenB;\r
+\r
+       float LExtA;\r
+       float LExtB;\r
+       float RExtA;\r
+       float RExtB;\r
+\r
+       void Clear();\r
+       void SetLocal(float Open, float Ext);\r
+       void Init2(const float * const *Mx, float Open, float Ext);\r
+       void Init4(const float * const *Mx, float Open, float Ext, float TermOpen, float TermExt);\r
+       void Init(const AlnParams &AP, const HSPData &HSP, unsigned LA, unsigned LB);\r
+       void InitFromCmdLine(bool Nucleo);\r
+       void SetMxFromCmdLine(bool Nucleo);\r
+       void SetPenalties(const string &OpenStr, const string &ExtStr);\r
+       float GetLocalOpen() const;\r
+       float GetLocalExt() const;\r
+       bool GetIsNucleo() const;\r
+\r
+       bool Is2() const;\r
+       bool Is4() const;\r
+       const char *GetType() const;\r
+\r
+       void LogMe() const;\r
+       };\r
+\r
+const float OBVIOUSLY_WRONG_PENALTY = 1000.0;\r
+\r
+#endif // alnparams_h\r
diff --git a/uchime_src/alpha.cpp b/uchime_src/alpha.cpp
new file mode 100644 (file)
index 0000000..0efca3b
--- /dev/null
@@ -0,0 +1,2761 @@
+// Generated by /p/py/alphac.py
+#include "alpha.h"
+
+unsigned g_CharToLetterAminoStop[256] =
+       {
+       INVALID_LETTER, // [  0] 0x00
+       INVALID_LETTER, // [  1] 0x01
+       INVALID_LETTER, // [  2] 0x02
+       INVALID_LETTER, // [  3] 0x03
+       INVALID_LETTER, // [  4] 0x04
+       INVALID_LETTER, // [  5] 0x05
+       INVALID_LETTER, // [  6] 0x06
+       INVALID_LETTER, // [  7] 0x07
+       INVALID_LETTER, // [  8] 0x08
+       INVALID_LETTER, // [  9] 0x09
+       INVALID_LETTER, // [ 10] 0x0a
+       INVALID_LETTER, // [ 11] 0x0b
+       INVALID_LETTER, // [ 12] 0x0c
+       INVALID_LETTER, // [ 13] 0x0d
+       INVALID_LETTER, // [ 14] 0x0e
+       INVALID_LETTER, // [ 15] 0x0f
+       INVALID_LETTER, // [ 16] 0x10
+       INVALID_LETTER, // [ 17] 0x11
+       INVALID_LETTER, // [ 18] 0x12
+       INVALID_LETTER, // [ 19] 0x13
+       INVALID_LETTER, // [ 20] 0x14
+       INVALID_LETTER, // [ 21] 0x15
+       INVALID_LETTER, // [ 22] 0x16
+       INVALID_LETTER, // [ 23] 0x17
+       INVALID_LETTER, // [ 24] 0x18
+       INVALID_LETTER, // [ 25] 0x19
+       INVALID_LETTER, // [ 26] 0x1a
+       INVALID_LETTER, // [ 27] 0x1b
+       INVALID_LETTER, // [ 28] 0x1c
+       INVALID_LETTER, // [ 29] 0x1d
+       INVALID_LETTER, // [ 30] 0x1e
+       INVALID_LETTER, // [ 31] 0x1f
+       INVALID_LETTER, // [ 32] ' '
+       INVALID_LETTER, // [ 33] '!'
+       INVALID_LETTER, // [ 34] '"'
+       INVALID_LETTER, // [ 35] '#'
+       INVALID_LETTER, // [ 36] '$'
+       INVALID_LETTER, // [ 37] '%'
+       INVALID_LETTER, // [ 38] '&'
+       INVALID_LETTER, // [ 39] '''
+       INVALID_LETTER, // [ 40] '('
+       INVALID_LETTER, // [ 41] ')'
+       20 ,            // [ 42] '*' = STP
+       INVALID_LETTER, // [ 43] '+'
+       INVALID_LETTER, // [ 44] ','
+       INVALID_LETTER, // [ 45] '-'
+       INVALID_LETTER, // [ 46] '.'
+       INVALID_LETTER, // [ 47] '/'
+       INVALID_LETTER, // [ 48] '0'
+       INVALID_LETTER, // [ 49] '1'
+       INVALID_LETTER, // [ 50] '2'
+       INVALID_LETTER, // [ 51] '3'
+       INVALID_LETTER, // [ 52] '4'
+       INVALID_LETTER, // [ 53] '5'
+       INVALID_LETTER, // [ 54] '6'
+       INVALID_LETTER, // [ 55] '7'
+       INVALID_LETTER, // [ 56] '8'
+       INVALID_LETTER, // [ 57] '9'
+       INVALID_LETTER, // [ 58] ':'
+       INVALID_LETTER, // [ 59] ';'
+       INVALID_LETTER, // [ 60] '<'
+       INVALID_LETTER, // [ 61] '='
+       INVALID_LETTER, // [ 62] '>'
+       INVALID_LETTER, // [ 63] '?'
+       INVALID_LETTER, // [ 64] '@'
+       0  ,            // [ 65] 'A' = Ala
+       INVALID_LETTER, // [ 66] 'B'
+       1  ,            // [ 67] 'C' = Cys
+       2  ,            // [ 68] 'D' = Asp
+       3  ,            // [ 69] 'E' = Glu
+       4  ,            // [ 70] 'F' = Phe
+       5  ,            // [ 71] 'G' = Gly
+       6  ,            // [ 72] 'H' = His
+       7  ,            // [ 73] 'I' = Ile
+       INVALID_LETTER, // [ 74] 'J'
+       8  ,            // [ 75] 'K' = Lys
+       9  ,            // [ 76] 'L' = Leu
+       10 ,            // [ 77] 'M' = Met
+       11 ,            // [ 78] 'N' = Asn
+       INVALID_LETTER, // [ 79] 'O'
+       12 ,            // [ 80] 'P' = Pro
+       13 ,            // [ 81] 'Q' = Gln
+       14 ,            // [ 82] 'R' = Arg
+       15 ,            // [ 83] 'S' = Ser
+       16 ,            // [ 84] 'T' = Thr
+       INVALID_LETTER, // [ 85] 'U'
+       17 ,            // [ 86] 'V' = Val
+       18 ,            // [ 87] 'W' = Trp
+       INVALID_LETTER, // [ 88] 'X'
+       19 ,            // [ 89] 'Y' = Tyr
+       INVALID_LETTER, // [ 90] 'Z'
+       INVALID_LETTER, // [ 91] '['
+       INVALID_LETTER, // [ 92] '\'
+       INVALID_LETTER, // [ 93] ']'
+       INVALID_LETTER, // [ 94] '^'
+       INVALID_LETTER, // [ 95] '_'
+       INVALID_LETTER, // [ 96] '`'
+       0  ,            // [ 97] 'a' = Ala
+       INVALID_LETTER, // [ 98] 'b'
+       1  ,            // [ 99] 'c' = Cys
+       2  ,            // [100] 'd' = Asp
+       3  ,            // [101] 'e' = Glu
+       4  ,            // [102] 'f' = Phe
+       5  ,            // [103] 'g' = Gly
+       6  ,            // [104] 'h' = His
+       7  ,            // [105] 'i' = Ile
+       INVALID_LETTER, // [106] 'j'
+       8  ,            // [107] 'k' = Lys
+       9  ,            // [108] 'l' = Leu
+       10 ,            // [109] 'm' = Met
+       11 ,            // [110] 'n' = Asn
+       INVALID_LETTER, // [111] 'o'
+       12 ,            // [112] 'p' = Pro
+       13 ,            // [113] 'q' = Gln
+       14 ,            // [114] 'r' = Arg
+       15 ,            // [115] 's' = Ser
+       16 ,            // [116] 't' = Thr
+       INVALID_LETTER, // [117] 'u'
+       17 ,            // [118] 'v' = Val
+       18 ,            // [119] 'w' = Trp
+       INVALID_LETTER, // [120] 'x'
+       19 ,            // [121] 'y' = Tyr
+       INVALID_LETTER, // [122] 'z'
+       INVALID_LETTER, // [123] '{'
+       INVALID_LETTER, // [124] '|'
+       INVALID_LETTER, // [125] '}'
+       INVALID_LETTER, // [126] '~'
+       INVALID_LETTER, // [127] 0x7f
+       INVALID_LETTER, // [128] 0x80
+       INVALID_LETTER, // [129] 0x81
+       INVALID_LETTER, // [130] 0x82
+       INVALID_LETTER, // [131] 0x83
+       INVALID_LETTER, // [132] 0x84
+       INVALID_LETTER, // [133] 0x85
+       INVALID_LETTER, // [134] 0x86
+       INVALID_LETTER, // [135] 0x87
+       INVALID_LETTER, // [136] 0x88
+       INVALID_LETTER, // [137] 0x89
+       INVALID_LETTER, // [138] 0x8a
+       INVALID_LETTER, // [139] 0x8b
+       INVALID_LETTER, // [140] 0x8c
+       INVALID_LETTER, // [141] 0x8d
+       INVALID_LETTER, // [142] 0x8e
+       INVALID_LETTER, // [143] 0x8f
+       INVALID_LETTER, // [144] 0x90
+       INVALID_LETTER, // [145] 0x91
+       INVALID_LETTER, // [146] 0x92
+       INVALID_LETTER, // [147] 0x93
+       INVALID_LETTER, // [148] 0x94
+       INVALID_LETTER, // [149] 0x95
+       INVALID_LETTER, // [150] 0x96
+       INVALID_LETTER, // [151] 0x97
+       INVALID_LETTER, // [152] 0x98
+       INVALID_LETTER, // [153] 0x99
+       INVALID_LETTER, // [154] 0x9a
+       INVALID_LETTER, // [155] 0x9b
+       INVALID_LETTER, // [156] 0x9c
+       INVALID_LETTER, // [157] 0x9d
+       INVALID_LETTER, // [158] 0x9e
+       INVALID_LETTER, // [159] 0x9f
+       INVALID_LETTER, // [160] 0xa0
+       INVALID_LETTER, // [161] 0xa1
+       INVALID_LETTER, // [162] 0xa2
+       INVALID_LETTER, // [163] 0xa3
+       INVALID_LETTER, // [164] 0xa4
+       INVALID_LETTER, // [165] 0xa5
+       INVALID_LETTER, // [166] 0xa6
+       INVALID_LETTER, // [167] 0xa7
+       INVALID_LETTER, // [168] 0xa8
+       INVALID_LETTER, // [169] 0xa9
+       INVALID_LETTER, // [170] 0xaa
+       INVALID_LETTER, // [171] 0xab
+       INVALID_LETTER, // [172] 0xac
+       INVALID_LETTER, // [173] 0xad
+       INVALID_LETTER, // [174] 0xae
+       INVALID_LETTER, // [175] 0xaf
+       INVALID_LETTER, // [176] 0xb0
+       INVALID_LETTER, // [177] 0xb1
+       INVALID_LETTER, // [178] 0xb2
+       INVALID_LETTER, // [179] 0xb3
+       INVALID_LETTER, // [180] 0xb4
+       INVALID_LETTER, // [181] 0xb5
+       INVALID_LETTER, // [182] 0xb6
+       INVALID_LETTER, // [183] 0xb7
+       INVALID_LETTER, // [184] 0xb8
+       INVALID_LETTER, // [185] 0xb9
+       INVALID_LETTER, // [186] 0xba
+       INVALID_LETTER, // [187] 0xbb
+       INVALID_LETTER, // [188] 0xbc
+       INVALID_LETTER, // [189] 0xbd
+       INVALID_LETTER, // [190] 0xbe
+       INVALID_LETTER, // [191] 0xbf
+       INVALID_LETTER, // [192] 0xc0
+       INVALID_LETTER, // [193] 0xc1
+       INVALID_LETTER, // [194] 0xc2
+       INVALID_LETTER, // [195] 0xc3
+       INVALID_LETTER, // [196] 0xc4
+       INVALID_LETTER, // [197] 0xc5
+       INVALID_LETTER, // [198] 0xc6
+       INVALID_LETTER, // [199] 0xc7
+       INVALID_LETTER, // [200] 0xc8
+       INVALID_LETTER, // [201] 0xc9
+       INVALID_LETTER, // [202] 0xca
+       INVALID_LETTER, // [203] 0xcb
+       INVALID_LETTER, // [204] 0xcc
+       INVALID_LETTER, // [205] 0xcd
+       INVALID_LETTER, // [206] 0xce
+       INVALID_LETTER, // [207] 0xcf
+       INVALID_LETTER, // [208] 0xd0
+       INVALID_LETTER, // [209] 0xd1
+       INVALID_LETTER, // [210] 0xd2
+       INVALID_LETTER, // [211] 0xd3
+       INVALID_LETTER, // [212] 0xd4
+       INVALID_LETTER, // [213] 0xd5
+       INVALID_LETTER, // [214] 0xd6
+       INVALID_LETTER, // [215] 0xd7
+       INVALID_LETTER, // [216] 0xd8
+       INVALID_LETTER, // [217] 0xd9
+       INVALID_LETTER, // [218] 0xda
+       INVALID_LETTER, // [219] 0xdb
+       INVALID_LETTER, // [220] 0xdc
+       INVALID_LETTER, // [221] 0xdd
+       INVALID_LETTER, // [222] 0xde
+       INVALID_LETTER, // [223] 0xdf
+       INVALID_LETTER, // [224] 0xe0
+       INVALID_LETTER, // [225] 0xe1
+       INVALID_LETTER, // [226] 0xe2
+       INVALID_LETTER, // [227] 0xe3
+       INVALID_LETTER, // [228] 0xe4
+       INVALID_LETTER, // [229] 0xe5
+       INVALID_LETTER, // [230] 0xe6
+       INVALID_LETTER, // [231] 0xe7
+       INVALID_LETTER, // [232] 0xe8
+       INVALID_LETTER, // [233] 0xe9
+       INVALID_LETTER, // [234] 0xea
+       INVALID_LETTER, // [235] 0xeb
+       INVALID_LETTER, // [236] 0xec
+       INVALID_LETTER, // [237] 0xed
+       INVALID_LETTER, // [238] 0xee
+       INVALID_LETTER, // [239] 0xef
+       INVALID_LETTER, // [240] 0xf0
+       INVALID_LETTER, // [241] 0xf1
+       INVALID_LETTER, // [242] 0xf2
+       INVALID_LETTER, // [243] 0xf3
+       INVALID_LETTER, // [244] 0xf4
+       INVALID_LETTER, // [245] 0xf5
+       INVALID_LETTER, // [246] 0xf6
+       INVALID_LETTER, // [247] 0xf7
+       INVALID_LETTER, // [248] 0xf8
+       INVALID_LETTER, // [249] 0xf9
+       INVALID_LETTER, // [250] 0xfa
+       INVALID_LETTER, // [251] 0xfb
+       INVALID_LETTER, // [252] 0xfc
+       INVALID_LETTER, // [253] 0xfd
+       INVALID_LETTER, // [254] 0xfe
+       INVALID_LETTER, // [255] 0xff
+       };
+unsigned g_CharToLetterAmino[256] =
+       {
+       INVALID_LETTER, // [  0] 0x00
+       INVALID_LETTER, // [  1] 0x01
+       INVALID_LETTER, // [  2] 0x02
+       INVALID_LETTER, // [  3] 0x03
+       INVALID_LETTER, // [  4] 0x04
+       INVALID_LETTER, // [  5] 0x05
+       INVALID_LETTER, // [  6] 0x06
+       INVALID_LETTER, // [  7] 0x07
+       INVALID_LETTER, // [  8] 0x08
+       INVALID_LETTER, // [  9] 0x09
+       INVALID_LETTER, // [ 10] 0x0a
+       INVALID_LETTER, // [ 11] 0x0b
+       INVALID_LETTER, // [ 12] 0x0c
+       INVALID_LETTER, // [ 13] 0x0d
+       INVALID_LETTER, // [ 14] 0x0e
+       INVALID_LETTER, // [ 15] 0x0f
+       INVALID_LETTER, // [ 16] 0x10
+       INVALID_LETTER, // [ 17] 0x11
+       INVALID_LETTER, // [ 18] 0x12
+       INVALID_LETTER, // [ 19] 0x13
+       INVALID_LETTER, // [ 20] 0x14
+       INVALID_LETTER, // [ 21] 0x15
+       INVALID_LETTER, // [ 22] 0x16
+       INVALID_LETTER, // [ 23] 0x17
+       INVALID_LETTER, // [ 24] 0x18
+       INVALID_LETTER, // [ 25] 0x19
+       INVALID_LETTER, // [ 26] 0x1a
+       INVALID_LETTER, // [ 27] 0x1b
+       INVALID_LETTER, // [ 28] 0x1c
+       INVALID_LETTER, // [ 29] 0x1d
+       INVALID_LETTER, // [ 30] 0x1e
+       INVALID_LETTER, // [ 31] 0x1f
+       INVALID_LETTER, // [ 32] ' '
+       INVALID_LETTER, // [ 33] '!'
+       INVALID_LETTER, // [ 34] '"'
+       INVALID_LETTER, // [ 35] '#'
+       INVALID_LETTER, // [ 36] '$'
+       INVALID_LETTER, // [ 37] '%'
+       INVALID_LETTER, // [ 38] '&'
+       INVALID_LETTER, // [ 39] '''
+       INVALID_LETTER, // [ 40] '('
+       INVALID_LETTER, // [ 41] ')'
+       INVALID_LETTER, // [ 42] '*'
+       INVALID_LETTER, // [ 43] '+'
+       INVALID_LETTER, // [ 44] ','
+       INVALID_LETTER, // [ 45] '-'
+       INVALID_LETTER, // [ 46] '.'
+       INVALID_LETTER, // [ 47] '/'
+       INVALID_LETTER, // [ 48] '0'
+       INVALID_LETTER, // [ 49] '1'
+       INVALID_LETTER, // [ 50] '2'
+       INVALID_LETTER, // [ 51] '3'
+       INVALID_LETTER, // [ 52] '4'
+       INVALID_LETTER, // [ 53] '5'
+       INVALID_LETTER, // [ 54] '6'
+       INVALID_LETTER, // [ 55] '7'
+       INVALID_LETTER, // [ 56] '8'
+       INVALID_LETTER, // [ 57] '9'
+       INVALID_LETTER, // [ 58] ':'
+       INVALID_LETTER, // [ 59] ';'
+       INVALID_LETTER, // [ 60] '<'
+       INVALID_LETTER, // [ 61] '='
+       INVALID_LETTER, // [ 62] '>'
+       INVALID_LETTER, // [ 63] '?'
+       INVALID_LETTER, // [ 64] '@'
+       0  ,            // [ 65] 'A' = Ala
+       INVALID_LETTER, // [ 66] 'B'
+       1  ,            // [ 67] 'C' = Cys
+       2  ,            // [ 68] 'D' = Asp
+       3  ,            // [ 69] 'E' = Glu
+       4  ,            // [ 70] 'F' = Phe
+       5  ,            // [ 71] 'G' = Gly
+       6  ,            // [ 72] 'H' = His
+       7  ,            // [ 73] 'I' = Ile
+       INVALID_LETTER, // [ 74] 'J'
+       8  ,            // [ 75] 'K' = Lys
+       9  ,            // [ 76] 'L' = Leu
+       10 ,            // [ 77] 'M' = Met
+       11 ,            // [ 78] 'N' = Asn
+       INVALID_LETTER, // [ 79] 'O'
+       12 ,            // [ 80] 'P' = Pro
+       13 ,            // [ 81] 'Q' = Gln
+       14 ,            // [ 82] 'R' = Arg
+       15 ,            // [ 83] 'S' = Ser
+       16 ,            // [ 84] 'T' = Thr
+       INVALID_LETTER, // [ 85] 'U'
+       17 ,            // [ 86] 'V' = Val
+       18 ,            // [ 87] 'W' = Trp
+       INVALID_LETTER, // [ 88] 'X'
+       19 ,            // [ 89] 'Y' = Tyr
+       INVALID_LETTER, // [ 90] 'Z'
+       INVALID_LETTER, // [ 91] '['
+       INVALID_LETTER, // [ 92] '\'
+       INVALID_LETTER, // [ 93] ']'
+       INVALID_LETTER, // [ 94] '^'
+       INVALID_LETTER, // [ 95] '_'
+       INVALID_LETTER, // [ 96] '`'
+       0  ,            // [ 97] 'a' = Ala
+       INVALID_LETTER, // [ 98] 'b'
+       1  ,            // [ 99] 'c' = Cys
+       2  ,            // [100] 'd' = Asp
+       3  ,            // [101] 'e' = Glu
+       4  ,            // [102] 'f' = Phe
+       5  ,            // [103] 'g' = Gly
+       6  ,            // [104] 'h' = His
+       7  ,            // [105] 'i' = Ile
+       INVALID_LETTER, // [106] 'j'
+       8  ,            // [107] 'k' = Lys
+       9  ,            // [108] 'l' = Leu
+       10 ,            // [109] 'm' = Met
+       11 ,            // [110] 'n' = Asn
+       INVALID_LETTER, // [111] 'o'
+       12 ,            // [112] 'p' = Pro
+       13 ,            // [113] 'q' = Gln
+       14 ,            // [114] 'r' = Arg
+       15 ,            // [115] 's' = Ser
+       16 ,            // [116] 't' = Thr
+       INVALID_LETTER, // [117] 'u'
+       17 ,            // [118] 'v' = Val
+       18 ,            // [119] 'w' = Trp
+       INVALID_LETTER, // [120] 'x'
+       19 ,            // [121] 'y' = Tyr
+       INVALID_LETTER, // [122] 'z'
+       INVALID_LETTER, // [123] '{'
+       INVALID_LETTER, // [124] '|'
+       INVALID_LETTER, // [125] '}'
+       INVALID_LETTER, // [126] '~'
+       INVALID_LETTER, // [127] 0x7f
+       INVALID_LETTER, // [128] 0x80
+       INVALID_LETTER, // [129] 0x81
+       INVALID_LETTER, // [130] 0x82
+       INVALID_LETTER, // [131] 0x83
+       INVALID_LETTER, // [132] 0x84
+       INVALID_LETTER, // [133] 0x85
+       INVALID_LETTER, // [134] 0x86
+       INVALID_LETTER, // [135] 0x87
+       INVALID_LETTER, // [136] 0x88
+       INVALID_LETTER, // [137] 0x89
+       INVALID_LETTER, // [138] 0x8a
+       INVALID_LETTER, // [139] 0x8b
+       INVALID_LETTER, // [140] 0x8c
+       INVALID_LETTER, // [141] 0x8d
+       INVALID_LETTER, // [142] 0x8e
+       INVALID_LETTER, // [143] 0x8f
+       INVALID_LETTER, // [144] 0x90
+       INVALID_LETTER, // [145] 0x91
+       INVALID_LETTER, // [146] 0x92
+       INVALID_LETTER, // [147] 0x93
+       INVALID_LETTER, // [148] 0x94
+       INVALID_LETTER, // [149] 0x95
+       INVALID_LETTER, // [150] 0x96
+       INVALID_LETTER, // [151] 0x97
+       INVALID_LETTER, // [152] 0x98
+       INVALID_LETTER, // [153] 0x99
+       INVALID_LETTER, // [154] 0x9a
+       INVALID_LETTER, // [155] 0x9b
+       INVALID_LETTER, // [156] 0x9c
+       INVALID_LETTER, // [157] 0x9d
+       INVALID_LETTER, // [158] 0x9e
+       INVALID_LETTER, // [159] 0x9f
+       INVALID_LETTER, // [160] 0xa0
+       INVALID_LETTER, // [161] 0xa1
+       INVALID_LETTER, // [162] 0xa2
+       INVALID_LETTER, // [163] 0xa3
+       INVALID_LETTER, // [164] 0xa4
+       INVALID_LETTER, // [165] 0xa5
+       INVALID_LETTER, // [166] 0xa6
+       INVALID_LETTER, // [167] 0xa7
+       INVALID_LETTER, // [168] 0xa8
+       INVALID_LETTER, // [169] 0xa9
+       INVALID_LETTER, // [170] 0xaa
+       INVALID_LETTER, // [171] 0xab
+       INVALID_LETTER, // [172] 0xac
+       INVALID_LETTER, // [173] 0xad
+       INVALID_LETTER, // [174] 0xae
+       INVALID_LETTER, // [175] 0xaf
+       INVALID_LETTER, // [176] 0xb0
+       INVALID_LETTER, // [177] 0xb1
+       INVALID_LETTER, // [178] 0xb2
+       INVALID_LETTER, // [179] 0xb3
+       INVALID_LETTER, // [180] 0xb4
+       INVALID_LETTER, // [181] 0xb5
+       INVALID_LETTER, // [182] 0xb6
+       INVALID_LETTER, // [183] 0xb7
+       INVALID_LETTER, // [184] 0xb8
+       INVALID_LETTER, // [185] 0xb9
+       INVALID_LETTER, // [186] 0xba
+       INVALID_LETTER, // [187] 0xbb
+       INVALID_LETTER, // [188] 0xbc
+       INVALID_LETTER, // [189] 0xbd
+       INVALID_LETTER, // [190] 0xbe
+       INVALID_LETTER, // [191] 0xbf
+       INVALID_LETTER, // [192] 0xc0
+       INVALID_LETTER, // [193] 0xc1
+       INVALID_LETTER, // [194] 0xc2
+       INVALID_LETTER, // [195] 0xc3
+       INVALID_LETTER, // [196] 0xc4
+       INVALID_LETTER, // [197] 0xc5
+       INVALID_LETTER, // [198] 0xc6
+       INVALID_LETTER, // [199] 0xc7
+       INVALID_LETTER, // [200] 0xc8
+       INVALID_LETTER, // [201] 0xc9
+       INVALID_LETTER, // [202] 0xca
+       INVALID_LETTER, // [203] 0xcb
+       INVALID_LETTER, // [204] 0xcc
+       INVALID_LETTER, // [205] 0xcd
+       INVALID_LETTER, // [206] 0xce
+       INVALID_LETTER, // [207] 0xcf
+       INVALID_LETTER, // [208] 0xd0
+       INVALID_LETTER, // [209] 0xd1
+       INVALID_LETTER, // [210] 0xd2
+       INVALID_LETTER, // [211] 0xd3
+       INVALID_LETTER, // [212] 0xd4
+       INVALID_LETTER, // [213] 0xd5
+       INVALID_LETTER, // [214] 0xd6
+       INVALID_LETTER, // [215] 0xd7
+       INVALID_LETTER, // [216] 0xd8
+       INVALID_LETTER, // [217] 0xd9
+       INVALID_LETTER, // [218] 0xda
+       INVALID_LETTER, // [219] 0xdb
+       INVALID_LETTER, // [220] 0xdc
+       INVALID_LETTER, // [221] 0xdd
+       INVALID_LETTER, // [222] 0xde
+       INVALID_LETTER, // [223] 0xdf
+       INVALID_LETTER, // [224] 0xe0
+       INVALID_LETTER, // [225] 0xe1
+       INVALID_LETTER, // [226] 0xe2
+       INVALID_LETTER, // [227] 0xe3
+       INVALID_LETTER, // [228] 0xe4
+       INVALID_LETTER, // [229] 0xe5
+       INVALID_LETTER, // [230] 0xe6
+       INVALID_LETTER, // [231] 0xe7
+       INVALID_LETTER, // [232] 0xe8
+       INVALID_LETTER, // [233] 0xe9
+       INVALID_LETTER, // [234] 0xea
+       INVALID_LETTER, // [235] 0xeb
+       INVALID_LETTER, // [236] 0xec
+       INVALID_LETTER, // [237] 0xed
+       INVALID_LETTER, // [238] 0xee
+       INVALID_LETTER, // [239] 0xef
+       INVALID_LETTER, // [240] 0xf0
+       INVALID_LETTER, // [241] 0xf1
+       INVALID_LETTER, // [242] 0xf2
+       INVALID_LETTER, // [243] 0xf3
+       INVALID_LETTER, // [244] 0xf4
+       INVALID_LETTER, // [245] 0xf5
+       INVALID_LETTER, // [246] 0xf6
+       INVALID_LETTER, // [247] 0xf7
+       INVALID_LETTER, // [248] 0xf8
+       INVALID_LETTER, // [249] 0xf9
+       INVALID_LETTER, // [250] 0xfa
+       INVALID_LETTER, // [251] 0xfb
+       INVALID_LETTER, // [252] 0xfc
+       INVALID_LETTER, // [253] 0xfd
+       INVALID_LETTER, // [254] 0xfe
+       INVALID_LETTER, // [255] 0xff
+       };
+
+unsigned char g_LetterToCharAmino[256] =
+       {
+       'A', // [0] 
+       'C', // [1] 
+       'D', // [2] 
+       'E', // [3] 
+       'F', // [4] 
+       'G', // [5] 
+       'H', // [6] 
+       'I', // [7] 
+       'K', // [8] 
+       'L', // [9] 
+       'M', // [10] 
+       'N', // [11] 
+       'P', // [12] 
+       'Q', // [13] 
+       'R', // [14] 
+       'S', // [15] 
+       'T', // [16] 
+       'V', // [17] 
+       'W', // [18] 
+       'Y', // [19] 
+       '*', // [20] 
+       INVALID_CHAR, // [21]
+       INVALID_CHAR, // [22]
+       INVALID_CHAR, // [23]
+       INVALID_CHAR, // [24]
+       INVALID_CHAR, // [25]
+       INVALID_CHAR, // [26]
+       INVALID_CHAR, // [27]
+       INVALID_CHAR, // [28]
+       INVALID_CHAR, // [29]
+       INVALID_CHAR, // [30]
+       INVALID_CHAR, // [31]
+       INVALID_CHAR, // [32]
+       INVALID_CHAR, // [33]
+       INVALID_CHAR, // [34]
+       INVALID_CHAR, // [35]
+       INVALID_CHAR, // [36]
+       INVALID_CHAR, // [37]
+       INVALID_CHAR, // [38]
+       INVALID_CHAR, // [39]
+       INVALID_CHAR, // [40]
+       INVALID_CHAR, // [41]
+       INVALID_CHAR, // [42]
+       INVALID_CHAR, // [43]
+       INVALID_CHAR, // [44]
+       INVALID_CHAR, // [45]
+       INVALID_CHAR, // [46]
+       INVALID_CHAR, // [47]
+       INVALID_CHAR, // [48]
+       INVALID_CHAR, // [49]
+       INVALID_CHAR, // [50]
+       INVALID_CHAR, // [51]
+       INVALID_CHAR, // [52]
+       INVALID_CHAR, // [53]
+       INVALID_CHAR, // [54]
+       INVALID_CHAR, // [55]
+       INVALID_CHAR, // [56]
+       INVALID_CHAR, // [57]
+       INVALID_CHAR, // [58]
+       INVALID_CHAR, // [59]
+       INVALID_CHAR, // [60]
+       INVALID_CHAR, // [61]
+       INVALID_CHAR, // [62]
+       INVALID_CHAR, // [63]
+       INVALID_CHAR, // [64]
+       INVALID_CHAR, // [65]
+       INVALID_CHAR, // [66]
+       INVALID_CHAR, // [67]
+       INVALID_CHAR, // [68]
+       INVALID_CHAR, // [69]
+       INVALID_CHAR, // [70]
+       INVALID_CHAR, // [71]
+       INVALID_CHAR, // [72]
+       INVALID_CHAR, // [73]
+       INVALID_CHAR, // [74]
+       INVALID_CHAR, // [75]
+       INVALID_CHAR, // [76]
+       INVALID_CHAR, // [77]
+       INVALID_CHAR, // [78]
+       INVALID_CHAR, // [79]
+       INVALID_CHAR, // [80]
+       INVALID_CHAR, // [81]
+       INVALID_CHAR, // [82]
+       INVALID_CHAR, // [83]
+       INVALID_CHAR, // [84]
+       INVALID_CHAR, // [85]
+       INVALID_CHAR, // [86]
+       INVALID_CHAR, // [87]
+       INVALID_CHAR, // [88]
+       INVALID_CHAR, // [89]
+       INVALID_CHAR, // [90]
+       INVALID_CHAR, // [91]
+       INVALID_CHAR, // [92]
+       INVALID_CHAR, // [93]
+       INVALID_CHAR, // [94]
+       INVALID_CHAR, // [95]
+       INVALID_CHAR, // [96]
+       INVALID_CHAR, // [97]
+       INVALID_CHAR, // [98]
+       INVALID_CHAR, // [99]
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       INVALID_CHAR, // [103]
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       INVALID_CHAR, // [116]
+       INVALID_CHAR, // [117]
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+       };
+
+unsigned g_CharToLetterNucleo[256] =
+       {
+       INVALID_LETTER, // [  0] = 0x00
+       INVALID_LETTER, // [  1] = 0x01
+       INVALID_LETTER, // [  2] = 0x02
+       INVALID_LETTER, // [  3] = 0x03
+       INVALID_LETTER, // [  4] = 0x04
+       INVALID_LETTER, // [  5] = 0x05
+       INVALID_LETTER, // [  6] = 0x06
+       INVALID_LETTER, // [  7] = 0x07
+       INVALID_LETTER, // [  8] = 0x08
+       INVALID_LETTER, // [  9] = 0x09
+       INVALID_LETTER, // [ 10] = 0x0a
+       INVALID_LETTER, // [ 11] = 0x0b
+       INVALID_LETTER, // [ 12] = 0x0c
+       INVALID_LETTER, // [ 13] = 0x0d
+       INVALID_LETTER, // [ 14] = 0x0e
+       INVALID_LETTER, // [ 15] = 0x0f
+       INVALID_LETTER, // [ 16] = 0x10
+       INVALID_LETTER, // [ 17] = 0x11
+       INVALID_LETTER, // [ 18] = 0x12
+       INVALID_LETTER, // [ 19] = 0x13
+       INVALID_LETTER, // [ 20] = 0x14
+       INVALID_LETTER, // [ 21] = 0x15
+       INVALID_LETTER, // [ 22] = 0x16
+       INVALID_LETTER, // [ 23] = 0x17
+       INVALID_LETTER, // [ 24] = 0x18
+       INVALID_LETTER, // [ 25] = 0x19
+       INVALID_LETTER, // [ 26] = 0x1a
+       INVALID_LETTER, // [ 27] = 0x1b
+       INVALID_LETTER, // [ 28] = 0x1c
+       INVALID_LETTER, // [ 29] = 0x1d
+       INVALID_LETTER, // [ 30] = 0x1e
+       INVALID_LETTER, // [ 31] = 0x1f
+       INVALID_LETTER, // [ 32] = 32
+       INVALID_LETTER, // [ 33] = 33
+       INVALID_LETTER, // [ 34] = 34
+       INVALID_LETTER, // [ 35] = 35
+       INVALID_LETTER, // [ 36] = 36
+       INVALID_LETTER, // [ 37] = 37
+       INVALID_LETTER, // [ 38] = 38
+       INVALID_LETTER, // [ 39] = 39
+       INVALID_LETTER, // [ 40] = 40
+       INVALID_LETTER, // [ 41] = 41
+       INVALID_LETTER, // [ 42] = 42
+       INVALID_LETTER, // [ 43] = 43
+       INVALID_LETTER, // [ 44] = 44
+       INVALID_LETTER, // [ 45] = 45
+       INVALID_LETTER, // [ 46] = 46
+       INVALID_LETTER, // [ 47] = 47
+       INVALID_LETTER, // [ 48] = 48
+       INVALID_LETTER, // [ 49] = 49
+       INVALID_LETTER, // [ 50] = 50
+       INVALID_LETTER, // [ 51] = 51
+       INVALID_LETTER, // [ 52] = 52
+       INVALID_LETTER, // [ 53] = 53
+       INVALID_LETTER, // [ 54] = 54
+       INVALID_LETTER, // [ 55] = 55
+       INVALID_LETTER, // [ 56] = 56
+       INVALID_LETTER, // [ 57] = 57
+       INVALID_LETTER, // [ 58] = 58
+       INVALID_LETTER, // [ 59] = 59
+       INVALID_LETTER, // [ 60] = 60
+       INVALID_LETTER, // [ 61] = 61
+       INVALID_LETTER, // [ 62] = 62
+       INVALID_LETTER, // [ 63] = 63
+       INVALID_LETTER, // [ 64] = 64
+       0  ,            // [ 65] = A (Nucleotide)
+       INVALID_LETTER, // [ 66] = 66
+       1  ,            // [ 67] = C (Nucleotide)
+       INVALID_LETTER, // [ 68] = 68
+       INVALID_LETTER, // [ 69] = 69
+       INVALID_LETTER, // [ 70] = 70
+       2  ,            // [ 71] = G (Nucleotide)
+       INVALID_LETTER, // [ 72] = 72
+       INVALID_LETTER, // [ 73] = 73
+       INVALID_LETTER, // [ 74] = 74
+       INVALID_LETTER, // [ 75] = 75
+       INVALID_LETTER, // [ 76] = 76
+       INVALID_LETTER, // [ 77] = 77
+       INVALID_LETTER, // [ 78] = 78
+       INVALID_LETTER, // [ 79] = 79
+       INVALID_LETTER, // [ 80] = 80
+       INVALID_LETTER, // [ 81] = 81
+       INVALID_LETTER, // [ 82] = 82
+       INVALID_LETTER, // [ 83] = 83
+       3  ,            // [ 84] = T (Nucleotide)
+       3  ,            // [ 85] = U (Nucleotide)
+       INVALID_LETTER, // [ 86] = 86
+       INVALID_LETTER, // [ 87] = 87
+       INVALID_LETTER, // [ 88] = 88
+       INVALID_LETTER, // [ 89] = 89
+       INVALID_LETTER, // [ 90] = 90
+       INVALID_LETTER, // [ 91] = 91
+       INVALID_LETTER, // [ 92] = 92
+       INVALID_LETTER, // [ 93] = 93
+       INVALID_LETTER, // [ 94] = 94
+       INVALID_LETTER, // [ 95] = 95
+       INVALID_LETTER, // [ 96] = 96
+       0  ,            // [ 97] = a (Nucleotide)
+       INVALID_LETTER, // [ 98] = 98
+       1  ,            // [ 99] = c (Nucleotide)
+       INVALID_LETTER, // [100] = 100
+       INVALID_LETTER, // [101] = 101
+       INVALID_LETTER, // [102] = 102
+       2  ,            // [103] = g (Nucleotide)
+       INVALID_LETTER, // [104] = 104
+       INVALID_LETTER, // [105] = 105
+       INVALID_LETTER, // [106] = 106
+       INVALID_LETTER, // [107] = 107
+       INVALID_LETTER, // [108] = 108
+       INVALID_LETTER, // [109] = 109
+       INVALID_LETTER, // [110] = 110
+       INVALID_LETTER, // [111] = 111
+       INVALID_LETTER, // [112] = 112
+       INVALID_LETTER, // [113] = 113
+       INVALID_LETTER, // [114] = 114
+       INVALID_LETTER, // [115] = 115
+       3  ,            // [116] = t (Nucleotide)
+       3  ,            // [117] = u (Nucleotide)
+       INVALID_LETTER, // [118] = 118
+       INVALID_LETTER, // [119] = 119
+       INVALID_LETTER, // [120] = 120
+       INVALID_LETTER, // [121] = 121
+       INVALID_LETTER, // [122] = 122
+       INVALID_LETTER, // [123] = 123
+       INVALID_LETTER, // [124] = 124
+       INVALID_LETTER, // [125] = 125
+       INVALID_LETTER, // [126] = 126
+       INVALID_LETTER, // [127] = 0x7f
+       INVALID_LETTER, // [128] = 0x80
+       INVALID_LETTER, // [129] = 0x81
+       INVALID_LETTER, // [130] = 0x82
+       INVALID_LETTER, // [131] = 0x83
+       INVALID_LETTER, // [132] = 0x84
+       INVALID_LETTER, // [133] = 0x85
+       INVALID_LETTER, // [134] = 0x86
+       INVALID_LETTER, // [135] = 0x87
+       INVALID_LETTER, // [136] = 0x88
+       INVALID_LETTER, // [137] = 0x89
+       INVALID_LETTER, // [138] = 0x8a
+       INVALID_LETTER, // [139] = 0x8b
+       INVALID_LETTER, // [140] = 0x8c
+       INVALID_LETTER, // [141] = 0x8d
+       INVALID_LETTER, // [142] = 0x8e
+       INVALID_LETTER, // [143] = 0x8f
+       INVALID_LETTER, // [144] = 0x90
+       INVALID_LETTER, // [145] = 0x91
+       INVALID_LETTER, // [146] = 0x92
+       INVALID_LETTER, // [147] = 0x93
+       INVALID_LETTER, // [148] = 0x94
+       INVALID_LETTER, // [149] = 0x95
+       INVALID_LETTER, // [150] = 0x96
+       INVALID_LETTER, // [151] = 0x97
+       INVALID_LETTER, // [152] = 0x98
+       INVALID_LETTER, // [153] = 0x99
+       INVALID_LETTER, // [154] = 0x9a
+       INVALID_LETTER, // [155] = 0x9b
+       INVALID_LETTER, // [156] = 0x9c
+       INVALID_LETTER, // [157] = 0x9d
+       INVALID_LETTER, // [158] = 0x9e
+       INVALID_LETTER, // [159] = 0x9f
+       INVALID_LETTER, // [160] = 0xa0
+       INVALID_LETTER, // [161] = 0xa1
+       INVALID_LETTER, // [162] = 0xa2
+       INVALID_LETTER, // [163] = 0xa3
+       INVALID_LETTER, // [164] = 0xa4
+       INVALID_LETTER, // [165] = 0xa5
+       INVALID_LETTER, // [166] = 0xa6
+       INVALID_LETTER, // [167] = 0xa7
+       INVALID_LETTER, // [168] = 0xa8
+       INVALID_LETTER, // [169] = 0xa9
+       INVALID_LETTER, // [170] = 0xaa
+       INVALID_LETTER, // [171] = 0xab
+       INVALID_LETTER, // [172] = 0xac
+       INVALID_LETTER, // [173] = 0xad
+       INVALID_LETTER, // [174] = 0xae
+       INVALID_LETTER, // [175] = 0xaf
+       INVALID_LETTER, // [176] = 0xb0
+       INVALID_LETTER, // [177] = 0xb1
+       INVALID_LETTER, // [178] = 0xb2
+       INVALID_LETTER, // [179] = 0xb3
+       INVALID_LETTER, // [180] = 0xb4
+       INVALID_LETTER, // [181] = 0xb5
+       INVALID_LETTER, // [182] = 0xb6
+       INVALID_LETTER, // [183] = 0xb7
+       INVALID_LETTER, // [184] = 0xb8
+       INVALID_LETTER, // [185] = 0xb9
+       INVALID_LETTER, // [186] = 0xba
+       INVALID_LETTER, // [187] = 0xbb
+       INVALID_LETTER, // [188] = 0xbc
+       INVALID_LETTER, // [189] = 0xbd
+       INVALID_LETTER, // [190] = 0xbe
+       INVALID_LETTER, // [191] = 0xbf
+       INVALID_LETTER, // [192] = 0xc0
+       INVALID_LETTER, // [193] = 0xc1
+       INVALID_LETTER, // [194] = 0xc2
+       INVALID_LETTER, // [195] = 0xc3
+       INVALID_LETTER, // [196] = 0xc4
+       INVALID_LETTER, // [197] = 0xc5
+       INVALID_LETTER, // [198] = 0xc6
+       INVALID_LETTER, // [199] = 0xc7
+       INVALID_LETTER, // [200] = 0xc8
+       INVALID_LETTER, // [201] = 0xc9
+       INVALID_LETTER, // [202] = 0xca
+       INVALID_LETTER, // [203] = 0xcb
+       INVALID_LETTER, // [204] = 0xcc
+       INVALID_LETTER, // [205] = 0xcd
+       INVALID_LETTER, // [206] = 0xce
+       INVALID_LETTER, // [207] = 0xcf
+       INVALID_LETTER, // [208] = 0xd0
+       INVALID_LETTER, // [209] = 0xd1
+       INVALID_LETTER, // [210] = 0xd2
+       INVALID_LETTER, // [211] = 0xd3
+       INVALID_LETTER, // [212] = 0xd4
+       INVALID_LETTER, // [213] = 0xd5
+       INVALID_LETTER, // [214] = 0xd6
+       INVALID_LETTER, // [215] = 0xd7
+       INVALID_LETTER, // [216] = 0xd8
+       INVALID_LETTER, // [217] = 0xd9
+       INVALID_LETTER, // [218] = 0xda
+       INVALID_LETTER, // [219] = 0xdb
+       INVALID_LETTER, // [220] = 0xdc
+       INVALID_LETTER, // [221] = 0xdd
+       INVALID_LETTER, // [222] = 0xde
+       INVALID_LETTER, // [223] = 0xdf
+       INVALID_LETTER, // [224] = 0xe0
+       INVALID_LETTER, // [225] = 0xe1
+       INVALID_LETTER, // [226] = 0xe2
+       INVALID_LETTER, // [227] = 0xe3
+       INVALID_LETTER, // [228] = 0xe4
+       INVALID_LETTER, // [229] = 0xe5
+       INVALID_LETTER, // [230] = 0xe6
+       INVALID_LETTER, // [231] = 0xe7
+       INVALID_LETTER, // [232] = 0xe8
+       INVALID_LETTER, // [233] = 0xe9
+       INVALID_LETTER, // [234] = 0xea
+       INVALID_LETTER, // [235] = 0xeb
+       INVALID_LETTER, // [236] = 0xec
+       INVALID_LETTER, // [237] = 0xed
+       INVALID_LETTER, // [238] = 0xee
+       INVALID_LETTER, // [239] = 0xef
+       INVALID_LETTER, // [240] = 0xf0
+       INVALID_LETTER, // [241] = 0xf1
+       INVALID_LETTER, // [242] = 0xf2
+       INVALID_LETTER, // [243] = 0xf3
+       INVALID_LETTER, // [244] = 0xf4
+       INVALID_LETTER, // [245] = 0xf5
+       INVALID_LETTER, // [246] = 0xf6
+       INVALID_LETTER, // [247] = 0xf7
+       INVALID_LETTER, // [248] = 0xf8
+       INVALID_LETTER, // [249] = 0xf9
+       INVALID_LETTER, // [250] = 0xfa
+       INVALID_LETTER, // [251] = 0xfb
+       INVALID_LETTER, // [252] = 0xfc
+       INVALID_LETTER, // [253] = 0xfd
+       INVALID_LETTER, // [254] = 0xfe
+       INVALID_LETTER, // [255] = 0xff
+       };
+
+unsigned char g_LetterToCharNucleo[256] =
+       {
+       'A', // [0]
+       'C', // [1]
+       'G', // [2]
+       'T', // [3]
+       INVALID_CHAR, // [4]
+       INVALID_CHAR, // [5]
+       INVALID_CHAR, // [6]
+       INVALID_CHAR, // [7]
+       INVALID_CHAR, // [8]
+       INVALID_CHAR, // [9]
+       INVALID_CHAR, // [10]
+       INVALID_CHAR, // [11]
+       INVALID_CHAR, // [12]
+       INVALID_CHAR, // [13]
+       INVALID_CHAR, // [14]
+       INVALID_CHAR, // [15]
+       INVALID_CHAR, // [16]
+       INVALID_CHAR, // [17]
+       INVALID_CHAR, // [18]
+       INVALID_CHAR, // [19]
+       INVALID_CHAR, // [20]
+       INVALID_CHAR, // [21]
+       INVALID_CHAR, // [22]
+       INVALID_CHAR, // [23]
+       INVALID_CHAR, // [24]
+       INVALID_CHAR, // [25]
+       INVALID_CHAR, // [26]
+       INVALID_CHAR, // [27]
+       INVALID_CHAR, // [28]
+       INVALID_CHAR, // [29]
+       INVALID_CHAR, // [30]
+       INVALID_CHAR, // [31]
+       INVALID_CHAR, // [32]
+       INVALID_CHAR, // [33]
+       INVALID_CHAR, // [34]
+       INVALID_CHAR, // [35]
+       INVALID_CHAR, // [36]
+       INVALID_CHAR, // [37]
+       INVALID_CHAR, // [38]
+       INVALID_CHAR, // [39]
+       INVALID_CHAR, // [40]
+       INVALID_CHAR, // [41]
+       INVALID_CHAR, // [42]
+       INVALID_CHAR, // [43]
+       INVALID_CHAR, // [44]
+       INVALID_CHAR, // [45]
+       INVALID_CHAR, // [46]
+       INVALID_CHAR, // [47]
+       INVALID_CHAR, // [48]
+       INVALID_CHAR, // [49]
+       INVALID_CHAR, // [50]
+       INVALID_CHAR, // [51]
+       INVALID_CHAR, // [52]
+       INVALID_CHAR, // [53]
+       INVALID_CHAR, // [54]
+       INVALID_CHAR, // [55]
+       INVALID_CHAR, // [56]
+       INVALID_CHAR, // [57]
+       INVALID_CHAR, // [58]
+       INVALID_CHAR, // [59]
+       INVALID_CHAR, // [60]
+       INVALID_CHAR, // [61]
+       INVALID_CHAR, // [62]
+       INVALID_CHAR, // [63]
+       INVALID_CHAR, // [64]
+       INVALID_CHAR, // [65]
+       INVALID_CHAR, // [66]
+       INVALID_CHAR, // [67]
+       INVALID_CHAR, // [68]
+       INVALID_CHAR, // [69]
+       INVALID_CHAR, // [70]
+       INVALID_CHAR, // [71]
+       INVALID_CHAR, // [72]
+       INVALID_CHAR, // [73]
+       INVALID_CHAR, // [74]
+       INVALID_CHAR, // [75]
+       INVALID_CHAR, // [76]
+       INVALID_CHAR, // [77]
+       INVALID_CHAR, // [78]
+       INVALID_CHAR, // [79]
+       INVALID_CHAR, // [80]
+       INVALID_CHAR, // [81]
+       INVALID_CHAR, // [82]
+       INVALID_CHAR, // [83]
+       INVALID_CHAR, // [84]
+       INVALID_CHAR, // [85]
+       INVALID_CHAR, // [86]
+       INVALID_CHAR, // [87]
+       INVALID_CHAR, // [88]
+       INVALID_CHAR, // [89]
+       INVALID_CHAR, // [90]
+       INVALID_CHAR, // [91]
+       INVALID_CHAR, // [92]
+       INVALID_CHAR, // [93]
+       INVALID_CHAR, // [94]
+       INVALID_CHAR, // [95]
+       INVALID_CHAR, // [96]
+       INVALID_CHAR, // [97]
+       INVALID_CHAR, // [98]
+       INVALID_CHAR, // [99]
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       INVALID_CHAR, // [103]
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       INVALID_CHAR, // [116]
+       INVALID_CHAR, // [117]
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+       };
+
+unsigned g_CodonWordToAminoLetter[4*4*4] =
+       {
+       8 , // [ 0] = AAA K (Lys)
+       11, // [ 1] = AAC N (Asn)
+       8 , // [ 2] = AAG K (Lys)
+       11, // [ 3] = AAT N (Asn)
+       16, // [ 4] = ACA T (Thr)
+       16, // [ 5] = ACC T (Thr)
+       16, // [ 6] = ACG T (Thr)
+       16, // [ 7] = ACT T (Thr)
+       14, // [ 8] = AGA R (Arg)
+       15, // [ 9] = AGC S (Ser)
+       14, // [10] = AGG R (Arg)
+       15, // [11] = AGT S (Ser)
+       7 , // [12] = ATA I (Ile)
+       7 , // [13] = ATC I (Ile)
+       10, // [14] = ATG M (Met)
+       7 , // [15] = ATT I (Ile)
+       13, // [16] = CAA Q (Gln)
+       6 , // [17] = CAC H (His)
+       13, // [18] = CAG Q (Gln)
+       6 , // [19] = CAT H (His)
+       12, // [20] = CCA P (Pro)
+       12, // [21] = CCC P (Pro)
+       12, // [22] = CCG P (Pro)
+       12, // [23] = CCT P (Pro)
+       14, // [24] = CGA R (Arg)
+       14, // [25] = CGC R (Arg)
+       14, // [26] = CGG R (Arg)
+       14, // [27] = CGT R (Arg)
+       9 , // [28] = CTA L (Leu)
+       9 , // [29] = CTC L (Leu)
+       9 , // [30] = CTG L (Leu)
+       9 , // [31] = CTT L (Leu)
+       3 , // [32] = GAA E (Glu)
+       2 , // [33] = GAC D (Asp)
+       3 , // [34] = GAG E (Glu)
+       2 , // [35] = GAT D (Asp)
+       0 , // [36] = GCA A (Ala)
+       0 , // [37] = GCC A (Ala)
+       0 , // [38] = GCG A (Ala)
+       0 , // [39] = GCT A (Ala)
+       5 , // [40] = GGA G (Gly)
+       5 , // [41] = GGC G (Gly)
+       5 , // [42] = GGG G (Gly)
+       5 , // [43] = GGT G (Gly)
+       17, // [44] = GTA V (Val)
+       17, // [45] = GTC V (Val)
+       17, // [46] = GTG V (Val)
+       17, // [47] = GTT V (Val)
+       20, // [48] = TAA * (STP)
+       19, // [49] = TAC Y (Tyr)
+       20, // [50] = TAG * (STP)
+       19, // [51] = TAT Y (Tyr)
+       15, // [52] = TCA S (Ser)
+       15, // [53] = TCC S (Ser)
+       15, // [54] = TCG S (Ser)
+       15, // [55] = TCT S (Ser)
+       20, // [56] = TGA * (STP)
+       1 , // [57] = TGC C (Cys)
+       18, // [58] = TGG W (Trp)
+       1 , // [59] = TGT C (Cys)
+       9 , // [60] = TTA L (Leu)
+       4 , // [61] = TTC F (Phe)
+       9 , // [62] = TTG L (Leu)
+       4 , // [63] = TTT F (Phe)
+       };
+
+char g_CodonWordToAminoChar[4*4*4] =
+       {
+       'K', // [ 0] = AAA (Lys)
+       'N', // [ 1] = AAC (Asn)
+       'K', // [ 2] = AAG (Lys)
+       'N', // [ 3] = AAT (Asn)
+       'T', // [ 4] = ACA (Thr)
+       'T', // [ 5] = ACC (Thr)
+       'T', // [ 6] = ACG (Thr)
+       'T', // [ 7] = ACT (Thr)
+       'R', // [ 8] = AGA (Arg)
+       'S', // [ 9] = AGC (Ser)
+       'R', // [10] = AGG (Arg)
+       'S', // [11] = AGT (Ser)
+       'I', // [12] = ATA (Ile)
+       'I', // [13] = ATC (Ile)
+       'M', // [14] = ATG (Met)
+       'I', // [15] = ATT (Ile)
+       'Q', // [16] = CAA (Gln)
+       'H', // [17] = CAC (His)
+       'Q', // [18] = CAG (Gln)
+       'H', // [19] = CAT (His)
+       'P', // [20] = CCA (Pro)
+       'P', // [21] = CCC (Pro)
+       'P', // [22] = CCG (Pro)
+       'P', // [23] = CCT (Pro)
+       'R', // [24] = CGA (Arg)
+       'R', // [25] = CGC (Arg)
+       'R', // [26] = CGG (Arg)
+       'R', // [27] = CGT (Arg)
+       'L', // [28] = CTA (Leu)
+       'L', // [29] = CTC (Leu)
+       'L', // [30] = CTG (Leu)
+       'L', // [31] = CTT (Leu)
+       'E', // [32] = GAA (Glu)
+       'D', // [33] = GAC (Asp)
+       'E', // [34] = GAG (Glu)
+       'D', // [35] = GAT (Asp)
+       'A', // [36] = GCA (Ala)
+       'A', // [37] = GCC (Ala)
+       'A', // [38] = GCG (Ala)
+       'A', // [39] = GCT (Ala)
+       'G', // [40] = GGA (Gly)
+       'G', // [41] = GGC (Gly)
+       'G', // [42] = GGG (Gly)
+       'G', // [43] = GGT (Gly)
+       'V', // [44] = GTA (Val)
+       'V', // [45] = GTC (Val)
+       'V', // [46] = GTG (Val)
+       'V', // [47] = GTT (Val)
+       '*', // [48] = TAA (STP)
+       'Y', // [49] = TAC (Tyr)
+       '*', // [50] = TAG (STP)
+       'Y', // [51] = TAT (Tyr)
+       'S', // [52] = TCA (Ser)
+       'S', // [53] = TCC (Ser)
+       'S', // [54] = TCG (Ser)
+       'S', // [55] = TCT (Ser)
+       '*', // [56] = TGA (STP)
+       'C', // [57] = TGC (Cys)
+       'W', // [58] = TGG (Trp)
+       'C', // [59] = TGT (Cys)
+       'L', // [60] = TTA (Leu)
+       'F', // [61] = TTC (Phe)
+       'L', // [62] = TTG (Leu)
+       'F', // [63] = TTT (Phe)
+       };
+
+unsigned char g_CharToCompChar[256] =
+       {
+       INVALID_CHAR, // [  0]
+       INVALID_CHAR, // [  1]
+       INVALID_CHAR, // [  2]
+       INVALID_CHAR, // [  3]
+       INVALID_CHAR, // [  4]
+       INVALID_CHAR, // [  5]
+       INVALID_CHAR, // [  6]
+       INVALID_CHAR, // [  7]
+       INVALID_CHAR, // [  8]
+       INVALID_CHAR, // [  9]
+       INVALID_CHAR, // [ 10]
+       INVALID_CHAR, // [ 11]
+       INVALID_CHAR, // [ 12]
+       INVALID_CHAR, // [ 13]
+       INVALID_CHAR, // [ 14]
+       INVALID_CHAR, // [ 15]
+       INVALID_CHAR, // [ 16]
+       INVALID_CHAR, // [ 17]
+       INVALID_CHAR, // [ 18]
+       INVALID_CHAR, // [ 19]
+       INVALID_CHAR, // [ 20]
+       INVALID_CHAR, // [ 21]
+       INVALID_CHAR, // [ 22]
+       INVALID_CHAR, // [ 23]
+       INVALID_CHAR, // [ 24]
+       INVALID_CHAR, // [ 25]
+       INVALID_CHAR, // [ 26]
+       INVALID_CHAR, // [ 27]
+       INVALID_CHAR, // [ 28]
+       INVALID_CHAR, // [ 29]
+       INVALID_CHAR, // [ 30]
+       INVALID_CHAR, // [ 31]
+       INVALID_CHAR, // [ 32]
+       INVALID_CHAR, // [ 33]
+       INVALID_CHAR, // [ 34]
+       INVALID_CHAR, // [ 35]
+       INVALID_CHAR, // [ 36]
+       INVALID_CHAR, // [ 37]
+       INVALID_CHAR, // [ 38]
+       INVALID_CHAR, // [ 39]
+       INVALID_CHAR, // [ 40]
+       INVALID_CHAR, // [ 41]
+       INVALID_CHAR, // [ 42]
+       INVALID_CHAR, // [ 43]
+       INVALID_CHAR, // [ 44]
+       INVALID_CHAR, // [ 45]
+       INVALID_CHAR, // [ 46]
+       INVALID_CHAR, // [ 47]
+       INVALID_CHAR, // [ 48]
+       INVALID_CHAR, // [ 49]
+       INVALID_CHAR, // [ 50]
+       INVALID_CHAR, // [ 51]
+       INVALID_CHAR, // [ 52]
+       INVALID_CHAR, // [ 53]
+       INVALID_CHAR, // [ 54]
+       INVALID_CHAR, // [ 55]
+       INVALID_CHAR, // [ 56]
+       INVALID_CHAR, // [ 57]
+       INVALID_CHAR, // [ 58]
+       INVALID_CHAR, // [ 59]
+       INVALID_CHAR, // [ 60]
+       INVALID_CHAR, // [ 61]
+       INVALID_CHAR, // [ 62]
+       INVALID_CHAR, // [ 63]
+       INVALID_CHAR, // [ 64]
+       'T',          // [ 65] A -> T
+       INVALID_CHAR, // [ 66]
+       'G',          // [ 67] C -> G
+       INVALID_CHAR, // [ 68]
+       INVALID_CHAR, // [ 69]
+       INVALID_CHAR, // [ 70]
+       'C',          // [ 71] G -> C
+       INVALID_CHAR, // [ 72]
+       INVALID_CHAR, // [ 73]
+       INVALID_CHAR, // [ 74]
+       INVALID_CHAR, // [ 75]
+       INVALID_CHAR, // [ 76]
+       INVALID_CHAR, // [ 77]
+       INVALID_CHAR, // [ 78]
+       INVALID_CHAR, // [ 79]
+       INVALID_CHAR, // [ 80]
+       INVALID_CHAR, // [ 81]
+       INVALID_CHAR, // [ 82]
+       INVALID_CHAR, // [ 83]
+       'A',          // [ 84] T -> A
+       'A',          // [ 85] U -> A
+       INVALID_CHAR, // [ 86]
+       INVALID_CHAR, // [ 87]
+       INVALID_CHAR, // [ 88]
+       INVALID_CHAR, // [ 89]
+       INVALID_CHAR, // [ 90]
+       INVALID_CHAR, // [ 91]
+       INVALID_CHAR, // [ 92]
+       INVALID_CHAR, // [ 93]
+       INVALID_CHAR, // [ 94]
+       INVALID_CHAR, // [ 95]
+       INVALID_CHAR, // [ 96]
+       'T',          // [ 97] a -> T
+       INVALID_CHAR, // [ 98]
+       'G',          // [ 99] c -> G
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       'C',          // [103] g -> C
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       'A',          // [116] t -> A
+       'A',          // [117] u -> A
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+};
+
+unsigned g_CharToCompLetter[256] =
+       {
+       INVALID_LETTER, // [  0]
+       INVALID_LETTER, // [  1]
+       INVALID_LETTER, // [  2]
+       INVALID_LETTER, // [  3]
+       INVALID_LETTER, // [  4]
+       INVALID_LETTER, // [  5]
+       INVALID_LETTER, // [  6]
+       INVALID_LETTER, // [  7]
+       INVALID_LETTER, // [  8]
+       INVALID_LETTER, // [  9]
+       INVALID_LETTER, // [ 10]
+       INVALID_LETTER, // [ 11]
+       INVALID_LETTER, // [ 12]
+       INVALID_LETTER, // [ 13]
+       INVALID_LETTER, // [ 14]
+       INVALID_LETTER, // [ 15]
+       INVALID_LETTER, // [ 16]
+       INVALID_LETTER, // [ 17]
+       INVALID_LETTER, // [ 18]
+       INVALID_LETTER, // [ 19]
+       INVALID_LETTER, // [ 20]
+       INVALID_LETTER, // [ 21]
+       INVALID_LETTER, // [ 22]
+       INVALID_LETTER, // [ 23]
+       INVALID_LETTER, // [ 24]
+       INVALID_LETTER, // [ 25]
+       INVALID_LETTER, // [ 26]
+       INVALID_LETTER, // [ 27]
+       INVALID_LETTER, // [ 28]
+       INVALID_LETTER, // [ 29]
+       INVALID_LETTER, // [ 30]
+       INVALID_LETTER, // [ 31]
+       INVALID_LETTER, // [ 32]
+       INVALID_LETTER, // [ 33]
+       INVALID_LETTER, // [ 34]
+       INVALID_LETTER, // [ 35]
+       INVALID_LETTER, // [ 36]
+       INVALID_LETTER, // [ 37]
+       INVALID_LETTER, // [ 38]
+       INVALID_LETTER, // [ 39]
+       INVALID_LETTER, // [ 40]
+       INVALID_LETTER, // [ 41]
+       INVALID_LETTER, // [ 42]
+       INVALID_LETTER, // [ 43]
+       INVALID_LETTER, // [ 44]
+       INVALID_LETTER, // [ 45]
+       INVALID_LETTER, // [ 46]
+       INVALID_LETTER, // [ 47]
+       INVALID_LETTER, // [ 48]
+       INVALID_LETTER, // [ 49]
+       INVALID_LETTER, // [ 50]
+       INVALID_LETTER, // [ 51]
+       INVALID_LETTER, // [ 52]
+       INVALID_LETTER, // [ 53]
+       INVALID_LETTER, // [ 54]
+       INVALID_LETTER, // [ 55]
+       INVALID_LETTER, // [ 56]
+       INVALID_LETTER, // [ 57]
+       INVALID_LETTER, // [ 58]
+       INVALID_LETTER, // [ 59]
+       INVALID_LETTER, // [ 60]
+       INVALID_LETTER, // [ 61]
+       INVALID_LETTER, // [ 62]
+       INVALID_LETTER, // [ 63]
+       INVALID_LETTER, // [ 64]
+       3,              // [ 65] A -> T
+       INVALID_LETTER, // [ 66]
+       2,              // [ 67] C -> G
+       INVALID_LETTER, // [ 68]
+       INVALID_LETTER, // [ 69]
+       INVALID_LETTER, // [ 70]
+       1,              // [ 71] G -> C
+       INVALID_LETTER, // [ 72]
+       INVALID_LETTER, // [ 73]
+       INVALID_LETTER, // [ 74]
+       INVALID_LETTER, // [ 75]
+       INVALID_LETTER, // [ 76]
+       INVALID_LETTER, // [ 77]
+       INVALID_LETTER, // [ 78]
+       INVALID_LETTER, // [ 79]
+       INVALID_LETTER, // [ 80]
+       INVALID_LETTER, // [ 81]
+       INVALID_LETTER, // [ 82]
+       INVALID_LETTER, // [ 83]
+       0,              // [ 84] T -> A
+       0,              // [ 85] U -> A
+       INVALID_LETTER, // [ 86]
+       INVALID_LETTER, // [ 87]
+       INVALID_LETTER, // [ 88]
+       INVALID_LETTER, // [ 89]
+       INVALID_LETTER, // [ 90]
+       INVALID_LETTER, // [ 91]
+       INVALID_LETTER, // [ 92]
+       INVALID_LETTER, // [ 93]
+       INVALID_LETTER, // [ 94]
+       INVALID_LETTER, // [ 95]
+       INVALID_LETTER, // [ 96]
+       3,              // [ 97] a -> T
+       INVALID_LETTER, // [ 98]
+       2,              // [ 99] c -> G
+       INVALID_LETTER, // [100]
+       INVALID_LETTER, // [101]
+       INVALID_LETTER, // [102]
+       1,              // [103] g -> C
+       INVALID_LETTER, // [104]
+       INVALID_LETTER, // [105]
+       INVALID_LETTER, // [106]
+       INVALID_LETTER, // [107]
+       INVALID_LETTER, // [108]
+       INVALID_LETTER, // [109]
+       INVALID_LETTER, // [110]
+       INVALID_LETTER, // [111]
+       INVALID_LETTER, // [112]
+       INVALID_LETTER, // [113]
+       INVALID_LETTER, // [114]
+       INVALID_LETTER, // [115]
+       0,              // [116] t -> A
+       0,              // [117] u -> A
+       INVALID_LETTER, // [118]
+       INVALID_LETTER, // [119]
+       INVALID_LETTER, // [120]
+       INVALID_LETTER, // [121]
+       INVALID_LETTER, // [122]
+       INVALID_LETTER, // [123]
+       INVALID_LETTER, // [124]
+       INVALID_LETTER, // [125]
+       INVALID_LETTER, // [126]
+       INVALID_LETTER, // [127]
+       INVALID_LETTER, // [128]
+       INVALID_LETTER, // [129]
+       INVALID_LETTER, // [130]
+       INVALID_LETTER, // [131]
+       INVALID_LETTER, // [132]
+       INVALID_LETTER, // [133]
+       INVALID_LETTER, // [134]
+       INVALID_LETTER, // [135]
+       INVALID_LETTER, // [136]
+       INVALID_LETTER, // [137]
+       INVALID_LETTER, // [138]
+       INVALID_LETTER, // [139]
+       INVALID_LETTER, // [140]
+       INVALID_LETTER, // [141]
+       INVALID_LETTER, // [142]
+       INVALID_LETTER, // [143]
+       INVALID_LETTER, // [144]
+       INVALID_LETTER, // [145]
+       INVALID_LETTER, // [146]
+       INVALID_LETTER, // [147]
+       INVALID_LETTER, // [148]
+       INVALID_LETTER, // [149]
+       INVALID_LETTER, // [150]
+       INVALID_LETTER, // [151]
+       INVALID_LETTER, // [152]
+       INVALID_LETTER, // [153]
+       INVALID_LETTER, // [154]
+       INVALID_LETTER, // [155]
+       INVALID_LETTER, // [156]
+       INVALID_LETTER, // [157]
+       INVALID_LETTER, // [158]
+       INVALID_LETTER, // [159]
+       INVALID_LETTER, // [160]
+       INVALID_LETTER, // [161]
+       INVALID_LETTER, // [162]
+       INVALID_LETTER, // [163]
+       INVALID_LETTER, // [164]
+       INVALID_LETTER, // [165]
+       INVALID_LETTER, // [166]
+       INVALID_LETTER, // [167]
+       INVALID_LETTER, // [168]
+       INVALID_LETTER, // [169]
+       INVALID_LETTER, // [170]
+       INVALID_LETTER, // [171]
+       INVALID_LETTER, // [172]
+       INVALID_LETTER, // [173]
+       INVALID_LETTER, // [174]
+       INVALID_LETTER, // [175]
+       INVALID_LETTER, // [176]
+       INVALID_LETTER, // [177]
+       INVALID_LETTER, // [178]
+       INVALID_LETTER, // [179]
+       INVALID_LETTER, // [180]
+       INVALID_LETTER, // [181]
+       INVALID_LETTER, // [182]
+       INVALID_LETTER, // [183]
+       INVALID_LETTER, // [184]
+       INVALID_LETTER, // [185]
+       INVALID_LETTER, // [186]
+       INVALID_LETTER, // [187]
+       INVALID_LETTER, // [188]
+       INVALID_LETTER, // [189]
+       INVALID_LETTER, // [190]
+       INVALID_LETTER, // [191]
+       INVALID_LETTER, // [192]
+       INVALID_LETTER, // [193]
+       INVALID_LETTER, // [194]
+       INVALID_LETTER, // [195]
+       INVALID_LETTER, // [196]
+       INVALID_LETTER, // [197]
+       INVALID_LETTER, // [198]
+       INVALID_LETTER, // [199]
+       INVALID_LETTER, // [200]
+       INVALID_LETTER, // [201]
+       INVALID_LETTER, // [202]
+       INVALID_LETTER, // [203]
+       INVALID_LETTER, // [204]
+       INVALID_LETTER, // [205]
+       INVALID_LETTER, // [206]
+       INVALID_LETTER, // [207]
+       INVALID_LETTER, // [208]
+       INVALID_LETTER, // [209]
+       INVALID_LETTER, // [210]
+       INVALID_LETTER, // [211]
+       INVALID_LETTER, // [212]
+       INVALID_LETTER, // [213]
+       INVALID_LETTER, // [214]
+       INVALID_LETTER, // [215]
+       INVALID_LETTER, // [216]
+       INVALID_LETTER, // [217]
+       INVALID_LETTER, // [218]
+       INVALID_LETTER, // [219]
+       INVALID_LETTER, // [220]
+       INVALID_LETTER, // [221]
+       INVALID_LETTER, // [222]
+       INVALID_LETTER, // [223]
+       INVALID_LETTER, // [224]
+       INVALID_LETTER, // [225]
+       INVALID_LETTER, // [226]
+       INVALID_LETTER, // [227]
+       INVALID_LETTER, // [228]
+       INVALID_LETTER, // [229]
+       INVALID_LETTER, // [230]
+       INVALID_LETTER, // [231]
+       INVALID_LETTER, // [232]
+       INVALID_LETTER, // [233]
+       INVALID_LETTER, // [234]
+       INVALID_LETTER, // [235]
+       INVALID_LETTER, // [236]
+       INVALID_LETTER, // [237]
+       INVALID_LETTER, // [238]
+       INVALID_LETTER, // [239]
+       INVALID_LETTER, // [240]
+       INVALID_LETTER, // [241]
+       INVALID_LETTER, // [242]
+       INVALID_LETTER, // [243]
+       INVALID_LETTER, // [244]
+       INVALID_LETTER, // [245]
+       INVALID_LETTER, // [246]
+       INVALID_LETTER, // [247]
+       INVALID_LETTER, // [248]
+       INVALID_LETTER, // [249]
+       INVALID_LETTER, // [250]
+       INVALID_LETTER, // [251]
+       INVALID_LETTER, // [252]
+       INVALID_LETTER, // [253]
+       INVALID_LETTER, // [254]
+       INVALID_LETTER, // [255]
+};
+
+bool g_IsAminoChar[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       true,  // [ 42] '*' = STP
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' = Ala
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' = Cys
+       true,  // [ 68] 'D' = Asp
+       true,  // [ 69] 'E' = Glu
+       true,  // [ 70] 'F' = Phe
+       true,  // [ 71] 'G' = Gly
+       true,  // [ 72] 'H' = His
+       true,  // [ 73] 'I' = Ile
+       false, // [ 74] 'J'
+       true,  // [ 75] 'K' = Lys
+       true,  // [ 76] 'L' = Leu
+       true,  // [ 77] 'M' = Met
+       true,  // [ 78] 'N' = Asn
+       false, // [ 79] 'O'
+       true,  // [ 80] 'P' = Pro
+       true,  // [ 81] 'Q' = Gln
+       true,  // [ 82] 'R' = Arg
+       true,  // [ 83] 'S' = Ser
+       true,  // [ 84] 'T' = Thr
+       false, // [ 85] 'U'
+       true,  // [ 86] 'V' = Val
+       true,  // [ 87] 'W' = Trp
+       false, // [ 88] 'X'
+       true,  // [ 89] 'Y' = Tyr
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' = Ala
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' = Cys
+       true,  // [100] 'D' = Asp
+       true,  // [101] 'E' = Glu
+       true,  // [102] 'F' = Phe
+       true,  // [103] 'G' = Gly
+       true,  // [104] 'H' = His
+       true,  // [105] 'I' = Ile
+       false, // [106] 'J'
+       true,  // [107] 'K' = Lys
+       true,  // [108] 'L' = Leu
+       true,  // [109] 'M' = Met
+       true,  // [110] 'N' = Asn
+       false, // [111] 'O'
+       true,  // [112] 'P' = Pro
+       true,  // [113] 'Q' = Gln
+       true,  // [114] 'R' = Arg
+       true,  // [115] 'S' = Ser
+       true,  // [116] 'T' = Thr
+       false, // [117] 'U'
+       true,  // [118] 'V' = Val
+       true,  // [119] 'W' = Trp
+       false, // [120] 'X'
+       true,  // [121] 'Y' = Tyr
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+bool g_IsNucleoChar[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       false, // [ 42] '*'
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' (Nucleotide)
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' (Nucleotide)
+       false, // [ 68] 'D'
+       false, // [ 69] 'E'
+       false, // [ 70] 'F'
+       true,  // [ 71] 'G' (Nucleotide)
+       false, // [ 72] 'H'
+       false, // [ 73] 'I'
+       false, // [ 74] 'J'
+       false, // [ 75] 'K'
+       false, // [ 76] 'L'
+       false, // [ 77] 'M'
+       true,  // [ 78] 'N' (Nucleotide)
+       false, // [ 79] 'O'
+       false, // [ 80] 'P'
+       false, // [ 81] 'Q'
+       false, // [ 82] 'R'
+       false, // [ 83] 'S'
+       true,  // [ 84] 'T' (Nucleotide)
+       true,  // [ 85] 'U' (Nucleotide)
+       false, // [ 86] 'V'
+       false, // [ 87] 'W'
+       false, // [ 88] 'X'
+       false, // [ 89] 'Y'
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' (Nucleotide)
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' (Nucleotide)
+       false, // [100] 'D'
+       false, // [101] 'E'
+       false, // [102] 'F'
+       true,  // [103] 'G' (Nucleotide)
+       false, // [104] 'H'
+       false, // [105] 'I'
+       false, // [106] 'J'
+       false, // [107] 'K'
+       false, // [108] 'L'
+       false, // [109] 'M'
+       true,  // [110] 'N' (Nucleotide)
+       false, // [111] 'O'
+       false, // [112] 'P'
+       false, // [113] 'Q'
+       false, // [114] 'R'
+       false, // [115] 'S'
+       true,  // [116] 'T' (Nucleotide)
+       true,  // [117] 'U' (Nucleotide)
+       false, // [118] 'V'
+       false, // [119] 'W'
+       false, // [120] 'X'
+       false, // [121] 'Y'
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+bool g_IsACGTU[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       false, // [ 42] '*'
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' (ACGT)
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' (ACGT)
+       false, // [ 68] 'D'
+       false, // [ 69] 'E'
+       false, // [ 70] 'F'
+       true,  // [ 71] 'G' (ACGT)
+       false, // [ 72] 'H'
+       false, // [ 73] 'I'
+       false, // [ 74] 'J'
+       false, // [ 75] 'K'
+       false, // [ 76] 'L'
+       false, // [ 77] 'M'
+       false, // [ 78] 'N'
+       false, // [ 79] 'O'
+       false, // [ 80] 'P'
+       false, // [ 81] 'Q'
+       false, // [ 82] 'R'
+       false, // [ 83] 'S'
+       true,  // [ 84] 'T' (ACGT)
+       true,  // [ 85] 'U' (ACGT)
+       false, // [ 86] 'V'
+       false, // [ 87] 'W'
+       false, // [ 88] 'X'
+       false, // [ 89] 'Y'
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' (ACGT)
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' (ACGT)
+       false, // [100] 'D'
+       false, // [101] 'E'
+       false, // [102] 'F'
+       true,  // [103] 'G' (ACGT)
+       false, // [104] 'H'
+       false, // [105] 'I'
+       false, // [106] 'J'
+       false, // [107] 'K'
+       false, // [108] 'L'
+       false, // [109] 'M'
+       false, // [110] 'N'
+       false, // [111] 'O'
+       false, // [112] 'P'
+       false, // [113] 'Q'
+       false, // [114] 'R'
+       false, // [115] 'S'
+       true,  // [116] 'T' (ACGT)
+       true,  // [117] 'U' (ACGT)
+       false, // [118] 'V'
+       false, // [119] 'W'
+       false, // [120] 'X'
+       false, // [121] 'Y'
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+float g_AminoFreqs[20] =
+       {
+       0.0777f, // 'A' = Ala
+       0.0161f, // 'C' = Cys
+       0.0527f, // 'D' = Asp
+       0.0631f, // 'E' = Glu
+       0.0417f, // 'F' = Phe
+       0.0718f, // 'G' = Gly
+       0.0238f, // 'H' = His
+       0.0606f, // 'I' = Ile
+       0.0601f, // 'K' = Lys
+       0.0906f, // 'L' = Leu
+       0.0233f, // 'M' = Met
+       0.0439f, // 'N' = Asn
+       0.0456f, // 'P' = Pro
+       0.0368f, // 'Q' = Gln
+       0.0526f, // 'R' = Arg
+       0.0639f, // 'S' = Ser
+       0.0570f, // 'T' = Thr
+       0.0712f, // 'V' = Val
+       0.0134f, // 'W' = Trp
+       0.0339f, // 'Y' = Tyr
+       };
diff --git a/uchime_src/alpha.h b/uchime_src/alpha.h
new file mode 100644 (file)
index 0000000..e021b7f
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef alpha_h\r
+#define alpha_h\r
+\r
+#include <limits.h>\r
+#include <string>\r
+\r
+using namespace std;\r
+\r
+const unsigned INVALID_LETTER = 0;\r
+const unsigned char INVALID_CHAR = '?';\r
+\r
+extern unsigned g_CharToLetterAmino[];\r
+extern unsigned g_CharToLetterAminoStop[];\r
+extern unsigned char g_LetterToCharAmino[];\r
+extern unsigned g_CharToLetterNucleo[];\r
+extern unsigned char g_LetterToCharNucleo[];\r
+extern unsigned g_CodonWordToAminoLetter[];\r
+extern char g_CodonWordToAminoChar[];\r
+extern unsigned char g_CharToCompChar[];\r
+extern unsigned g_CharToCompLetter[];\r
+extern bool g_IsAminoChar[];\r
+extern bool g_IsNucleoChar[];\r
+extern bool g_IsACGTU[];\r
+extern float g_AminoFreqs[];\r
+\r
+extern unsigned g_CharToLetterRed[];\r
+extern unsigned char g_LetterToCharRed[];\r
+extern unsigned g_RedAlphaSize;\r
+\r
+void LogRedAlphaRed();\r
+void ReadRedAlphaFromFile(const string &FileName);\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+  unsigned char c3);\r
+\r
+static inline bool AminoLetterIsStartCodon(unsigned char Letter)\r
+       {\r
+       return Letter == 10;\r
+       }\r
+\r
+static inline bool AminoLetterIsStopCodon(unsigned char Letter)\r
+       {\r
+       return Letter == 20;\r
+       }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo);\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str);\r
+\r
+#endif // alpha_h\r
diff --git a/uchime_src/alpha2.cpp b/uchime_src/alpha2.cpp
new file mode 100644 (file)
index 0000000..26bc1c6
--- /dev/null
@@ -0,0 +1,100 @@
+#include "myutils.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+bool isgap(byte c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength)\r
+       {\r
+       static char Str[32];\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%20;\r
+               Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+               Word /= 20;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str)\r
+       {\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%20;\r
+               Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+               Word /= 20;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength)\r
+       {\r
+       static char Str[32];\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%4;\r
+               Str[WordLength-i-1] = g_LetterToCharNucleo[Letter];\r
+               Word /= 4;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo)\r
+       {\r
+       return (Nucleo ? WordToStrNucleo : WordToStrAmino)(Word, WordLength);\r
+       }\r
+\r
+byte *RevCompAlloc(const byte *Seq, unsigned L)\r
+       {\r
+       byte *RCSeq = MYALLOC(byte, L, Alpha);\r
+\r
+       for (unsigned i = 0; i < L; ++i)\r
+               RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+\r
+       return RCSeq;\r
+       }\r
+\r
+void RevCompInPlace(byte *Seq, unsigned L)\r
+       {\r
+       unsigned L1 = L - 1;\r
+       unsigned L2 = L/2;\r
+       for (unsigned i = 0; i < L2; ++i)\r
+               {\r
+               unsigned j = L1 - i;\r
+               unsigned ci = Seq[i];\r
+               unsigned cj = Seq[j];\r
+\r
+               unsigned ri = g_CharToCompChar[ci];\r
+               unsigned rj = g_CharToCompChar[cj];\r
+\r
+               Seq[i] = rj;\r
+               Seq[j] = ri;\r
+               }\r
+\r
+       if (L%2 == 1)\r
+               Seq[L2] = g_CharToCompChar[Seq[L2]];\r
+       }\r
+\r
+void RevComp(const byte *Seq, unsigned L, byte *RCSeq)\r
+       {\r
+       for (unsigned i = 0; i < L; ++i)\r
+               RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+       }\r
+\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+  unsigned char c3)\r
+       {\r
+       unsigned Letter1 = g_CharToLetterNucleo[c1];\r
+       unsigned Letter2 = g_CharToLetterNucleo[c2];\r
+       unsigned Letter3 = g_CharToLetterNucleo[c3];\r
+       unsigned Word = Letter1*(4*4) + Letter2*4 + Letter3;\r
+\r
+       unsigned Letter = g_CodonWordToAminoLetter[Word];\r
+       return g_LetterToCharAmino[Letter];\r
+       }\r
diff --git a/uchime_src/chainer.h b/uchime_src/chainer.h
new file mode 100644 (file)
index 0000000..a954dc0
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef chainer_h\r
+#define chainer_h\r
+\r
+#include "hsp.h"\r
+#include "seq.h"\r
+#include <list>\r
+\r
+const float BAD_SCORE = -9e9f;\r
+\r
+struct TargetHit\r
+       {\r
+       unsigned TargetIndex;\r
+       unsigned TargetLo;\r
+       unsigned TargetHi;\r
+       int QueryFrame;\r
+       float RawScore; // SOMETIMES USED FOR BIT SCORE!!!\r
+//     unsigned TargetLength;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("lo %u, hi %u, frame %d, score %.1f\n",\r
+                 TargetLo, TargetHi, QueryFrame, RawScore);\r
+               }\r
+       };\r
+\r
+struct ChainData\r
+       {\r
+       unsigned LastHSPIndex;\r
+       unsigned Ahi;\r
+       unsigned Bhi;\r
+       float Score;\r
+       };\r
+\r
+class Chainer\r
+       {\r
+public:\r
+       HSPData **m_HSPs; // memory owned elsewhere\r
+       unsigned m_HSPCount;\r
+       unsigned m_MaxHSPCount;\r
+\r
+       BPData *m_BPs;\r
+\r
+       unsigned *m_PrevHSPIndexes;             // Predecessor in chain\r
+       float *m_HSPIndexToChainScore;\r
+\r
+       list<unsigned> m_Chains;                // Live HSP indexes\r
+\r
+public:\r
+       Chainer();\r
+       ~Chainer();\r
+       void Reset();\r
+       void Clear(bool ctor = false);\r
+       float Chain(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+         unsigned &OptChainLength);\r
+       bool ResolveOverlaps(const SeqData &SA, const SeqData &SB, double MinScore,\r
+         const float * const *SubstMx, HSPData **InHSPs, unsigned InHSPCount,\r
+         HSPData **OutHSPs, unsigned &OutHSPCount);\r
+       void ResolveOverlap(HSPData &HSP1, HSPData &HSP2);\r
+\r
+       float ChainBrute(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+         unsigned &OptChainLength);\r
+       void LogMe() const;\r
+       void LogHSPs(HSPData **HSPs, unsigned HSPCount) const;\r
+       void LogBPs() const;\r
+\r
+       static bool IsValidChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void AssertValidChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void LogChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void LogChain2(HSPData **HSPs, unsigned HSPCount);\r
+       static float GetChainScore(HSPData **HSPs, unsigned HSPCount);\r
+\r
+private:\r
+       void AllocHSPCount(unsigned MaxHSPCount);\r
+       void SetBPs();\r
+       void SortBPs();\r
+       unsigned FindBestChainLT(unsigned Ahi, unsigned Bhi);\r
+       };\r
+\r
+#endif // chainer_h\r
diff --git a/uchime_src/chime.h b/uchime_src/chime.h
new file mode 100644 (file)
index 0000000..1b0662a
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef chime_h\r
+#define chime_h\r
+\r
+#include "seq.h"\r
+\r
+struct ChimeHit2\r
+       {\r
+       string QLabel;\r
+       string ALabel;\r
+       string BLabel;\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+\r
+       //unsigned LY, LN, LA, LD;\r
+       //unsigned RY, RN, RA, RD;\r
+       double PctIdQT, PctIdQA, PctIdQB, PctIdQM, PctIdAB;\r
+\r
+       unsigned ColLo;\r
+       unsigned ColXLo;\r
+       unsigned ColXHi;\r
+       unsigned ColHi;\r
+       unsigned QXLo;\r
+       unsigned QXHi;\r
+\r
+       double Div;\r
+       double Score;\r
+       double H;\r
+\r
+       unsigned CS_LY, CS_LN, CS_LA, CS_RY, CS_RN, CS_RA;\r
+\r
+       float AbQ;\r
+       float AbA;\r
+       float AbB;\r
+\r
+       ChimeHit2()\r
+               {\r
+               Clear();\r
+               }\r
+\r
+       void Clear()\r
+               {\r
+               Q3.clear();\r
+               A3.clear();\r
+               B3.clear();\r
+               QLabel.clear();\r
+               ALabel.clear();\r
+               BLabel.clear();\r
+\r
+               //LY = LN = LA = LD = UINT_MAX;\r
+               //RY = RN = RA = RD = UINT_MAX;\r
+               ColLo = ColHi = QXLo = QXHi = ColXLo = ColXHi = UINT_MAX;\r
+               CS_LY = CS_LN = CS_LA = CS_RY = CS_RN = CS_RA = UINT_MAX;\r
+               PctIdQT = PctIdQA = PctIdQB = PctIdQM = PctIdAB = -1.0;\r
+               Div = -1.0;\r
+               H = -1.0;\r
+               Score = -1.0;\r
+               AbQ = AbA = AbB = -1.0f;\r
+               };\r
+\r
+       bool Accept() const\r
+               {\r
+               return Score >= opt_minh && Div >= opt_mindiv && CS_LY >= opt_mindiffs && CS_RY >= opt_mindiffs;\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("@L %c ", yon(Score >= 1.0 && Div >= 1.0));\r
+               Log(" %.4f", Score);\r
+               Log(" LY %u LN %u LA %u", CS_LY, CS_LN, CS_LA);\r
+               Log(" RY %u RN %u RA %u", CS_RY, CS_RN, CS_RA);\r
+               Log(" Div %.1f%%", Div);\r
+               Log(" Q=%s", QLabel.c_str());\r
+               Log(" A=%s", ALabel.c_str());\r
+               Log(" B=%s", BLabel.c_str());\r
+               Log(" QA %.1f%% QB=%.1f%% AB=%.1f%% QM=%.1f%%", PctIdQA, PctIdQB, PctIdAB, PctIdQM);\r
+               Log("\n");\r
+               }\r
+\r
+       bool operator<(const ChimeHit2 &rhs) const\r
+               {\r
+               if (Score == rhs.Score)\r
+                       return Div > rhs.Div;\r
+               return Score > rhs.Score;\r
+               }\r
+       };\r
+\r
+static inline bool isacgt(char c)\r
+       {\r
+       return c == 'A' || c == 'C' || c == 'G' || c == 'T';\r
+       }\r
+\r
+static bool inline isgap(char c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los);\r
+float GetAbFromLabel(const string &Label);\r
+void WriteChimeHitCS(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeFileHdr(FILE *f);\r
+\r
+#endif // chime_h\r
diff --git a/uchime_src/counters.h b/uchime_src/counters.h
new file mode 100644 (file)
index 0000000..a433cc8
--- /dev/null
@@ -0,0 +1,39 @@
+C(Search)\r
+C(SearchBlast)\r
+C(HotHits)\r
+C(HotHits2)\r
+C(WindexAccepts)\r
+C(WindexRejects)\r
+C(AlnAccepts)\r
+C(AlnRejects)\r
+C(Seqs)\r
+C(FilterAccepts)\r
+C(FilterRejects)\r
+C(DiagRejects)\r
+C(DPTooBig)\r
+C(HotHitCut)\r
+C(FastRejects)\r
+C(FastRejects2)\r
+C(Step)\r
+C(HSPConflict)\r
+C(DPArea)\r
+C(DPArea2)\r
+C(DPArea3)\r
+C(DPArea4)\r
+C(DPArea5)\r
+C(HSPIdRejects)\r
+C(NoHSPRejects)\r
+C(NoHSPAccepts)\r
+C(BandRejects)\r
+C(FractIdBestSeg)\r
+C(FractIdHSPs)\r
+C(Excludes)\r
+C(NonExcludes)\r
+C(AlignQueryToSeed)\r
+C(PWA_Align)\r
+C(HitExtends)\r
+C(FailedExtends)\r
+C(HitExtendLetters)\r
+C(FailedExtendLetters)\r
+C(AddWords)\r
+C(AddWordGrows)\r
diff --git a/uchime_src/diagbox.h b/uchime_src/diagbox.h
new file mode 100644 (file)
index 0000000..0c5846c
--- /dev/null
@@ -0,0 +1,193 @@
+#ifndef diagbox_h\r
+#define diagbox_h\r
+\r
+struct DiagBox;\r
+\r
+void GetDiagBox(unsigned LA, unsigned LB, unsigned DiagLo, unsigned DiagHi, DiagBox &Box);\r
+void GetDiagRange(unsigned LA, unsigned LB, unsigned d,\r
+  unsigned &mini, unsigned &minj, unsigned &maxi, unsigned &maxj);\r
+void GetDiagLoHi(unsigned LA, unsigned LB, const char *Path,\r
+  unsigned &dlo, unsigned &dhi);\r
+\r
+struct DiagBox\r
+       {\r
+       DiagBox()\r
+               {\r
+               }\r
+\r
+       DiagBox(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+               {\r
+               //GetDiagBox(LA, LB, DiagLo, DiagHi, *this);\r
+               //Validate();\r
+               Init(LA_, LB_, DiagLo, DiagHi);\r
+               }\r
+\r
+       void Init(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+               {\r
+               GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this);\r
+               Validate();\r
+               }\r
+\r
+       unsigned LA;\r
+       unsigned LB;\r
+\r
+       unsigned dlo;\r
+       unsigned dhi;\r
+\r
+       unsigned dlo_mini;\r
+       unsigned dlo_minj;\r
+\r
+       unsigned dlo_maxi;\r
+       unsigned dlo_maxj;\r
+\r
+       unsigned dhi_mini;\r
+       unsigned dhi_minj;\r
+\r
+       unsigned dhi_maxi;\r
+       unsigned dhi_maxj;\r
+\r
+       unsigned GetDiag(unsigned i, unsigned j) const\r
+               {\r
+               return LA - i + j;\r
+               }\r
+\r
+// i, j are positions 0..LA-1, 0..LB-1.\r
+       bool InBox(unsigned i, unsigned j) const\r
+               {\r
+               unsigned d = GetDiag(i, j);\r
+               return d >= dlo && d <= dhi;\r
+               }\r
+\r
+/***\r
+i, j are 0-based prefix lengths 0..LA, 0..LB.\r
+\r
+A full path is in the box iff all match pairs are in the box.\r
+\r
+A partial path that aligns a prefix of A to a prefix of B as\r
+in D.P.) is in the box iff it is is the prefix of at least\r
+one full path that is in the box.\r
+\r
+A D.P. matrix entry X[i][j] is in the box iff there is at\r
+least one full path aligning the first i letters of A and\r
+the first j letters of B ending in a column of type X, i.e.\r
+if there exists a partial path in the box that ends in X.\r
+\r
+Assume terminals appear in all paths, and DI/ID forbidden.\r
+\r
+Intuitively seems that by these definitions D is in box iff\r
+DM or MD is in box, I is in box iff IM or MI is in box.\r
+Don't have proof..\r
+***/\r
+       bool InBoxDPM(unsigned i, unsigned j) const\r
+               {\r
+       // Special case for M[0][0]\r
+               if (i == 0 && j == 0)\r
+                       return true;\r
+               if (i == 0 || j == 0)\r
+                       return false;\r
+               unsigned d = GetDiag(i-1, j-1);\r
+               return d >= dlo && d <= dhi;\r
+               }\r
+\r
+       bool InBoxDPD(unsigned i, unsigned j) const\r
+               {\r
+               bool MD = i == 0 ? false : InBoxDPM(i-1, j);\r
+               bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+               return MD || DM;\r
+               }\r
+\r
+       bool InBoxDPI(unsigned i, unsigned j) const\r
+               {\r
+               bool MI = j == 0 ? false : InBoxDPM(i, j-1);\r
+               bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+               return MI || IM;\r
+               }\r
+\r
+       // d = LA - i + j = 1 .. LA+LB-1\r
+       void Validate() const\r
+               {\r
+               asserta(dlo <= dhi);\r
+               asserta(dlo >= GetDiag(LA-1, 0));\r
+               asserta(dhi <= GetDiag(0, LB-1));\r
+\r
+               asserta(GetDiag(dlo_mini, dlo_minj) == dlo);\r
+               asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo);\r
+               asserta(GetDiag(dhi_mini, dhi_minj) == dhi);\r
+               asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi);\r
+\r
+               asserta(dlo_mini >= dhi_mini);\r
+               asserta(dlo_minj <= dhi_minj);\r
+               asserta(dlo_maxi >= dhi_maxi);\r
+               asserta(dlo_maxj <= dhi_maxj);\r
+               }\r
+\r
+       unsigned GetMini() const\r
+               {\r
+               return dhi_mini;\r
+               }\r
+\r
+       unsigned GetMaxi() const\r
+               {\r
+               return dlo_maxi;\r
+               }\r
+\r
+       unsigned GetMinj() const\r
+               {\r
+               return dlo_minj;\r
+               }\r
+\r
+       unsigned GetMaxj() const\r
+               {\r
+               return dhi_maxj;\r
+               }\r
+/***\r
+       i = 0..LA-1\r
+       j = 0..LB-1\r
+       d = LA - i + j = 1 .. LA+LB-1\r
+       j = d - LA + i\r
+       i = LA - d + j\r
+***/\r
+       void GetRange_j(unsigned i, unsigned &Startj, unsigned &Endj) const\r
+               {\r
+       // j = d - LA + i\r
+               if (dlo + i >= LA)\r
+                       Startj = dlo + i - LA;\r
+               else\r
+                       Startj = 0;\r
+\r
+               if (Startj >= LB)\r
+                       Startj = LB - 1;\r
+\r
+               if (dhi + i + 1 >= LA)\r
+                       Endj = dhi + i + 1 - LA;\r
+               else\r
+                       Endj = 0;\r
+\r
+               if (Endj > LB)\r
+                       Endj = LB;\r
+\r
+               asserta(Endj >= Startj);\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n",\r
+                 LA, LB,\r
+                 dlo,\r
+                 dlo_mini, dlo_minj,\r
+                 dlo_maxi, dlo_maxj,\r
+                 dhi,\r
+                 dhi_mini, dhi_minj,\r
+                 dhi_maxi, dhi_maxj,\r
+                 GetMini(), GetMaxi(),\r
+                 GetMinj(), GetMaxj());\r
+               }\r
+       };\r
+\r
+typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB,
+  unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+
+const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB,
+  unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+\r
+#endif // diagbox_h\r
diff --git a/uchime_src/dp.h b/uchime_src/dp.h
new file mode 100644 (file)
index 0000000..c771538
--- /dev/null
@@ -0,0 +1,164 @@
+#ifndef dp_h\r
+#define dp_h\r
+\r
+#define SAVE_FAST      0\r
+\r
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "diagbox.h"\r
+#include "path.h"\r
+#include "alnparams.h"\r
+#include "alnheuristics.h"\r
+#include "hspfinder.h"\r
+\r
+typedef void (*OnPathFn)(const string &Path, bool Full);\r
+\r
+enum XType\r
+       {\r
+       XType_Full=1,\r
+       XType_Fwd=2,\r
+       XType_Bwd=3,\r
+       };\r
+\r
+// public\r
+float ViterbiBrute(const byte *A, unsigned LA, const byte *B, unsigned LB, \r
+  unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimpleBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned DiagLo, unsigned DiagHi, PathData &PD);\r
+\r
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastMainDiag(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned BandRadius, const AlnParams &AP, PathData &PD);\r
+\r
+float XDropFwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropFwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+void XDropAlign(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned AncLoi, unsigned AncLoj, unsigned AncLen, const AlnParams &AP,\r
+  float XDrop, HSPData &HSP, PathData &PD);\r
+\r
+float SWSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+  unsigned &Hij, PathData &PD);\r
+\r
+float SWFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+  unsigned &Hij, PathData &PD);\r
+\r
+void SWFast2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP, PathData &PD);\r
+\r
+void SWSimple2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP, PathData &PD);\r
+\r
+float SWUngapped(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const float * const *SubstMx, unsigned &LoA, unsigned &LoB, unsigned &Len);\r
+\r
+void SWUngapped2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP);\r
+\r
+float SWFastNTB(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP);\r
+\r
+void GlobalAlignBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned BandRadius, PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &AP,\r
+  const AlnHeuristics &AH, HSPFinder &HF, float MinFractId, float &HSPFractId,\r
+  PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+\r
+void GetBruteMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetXDropFwdSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#if    SAVE_FAST\r
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetFastBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#endif\r
+\r
+// private\r
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD);\r
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,\r
+  unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+void EnumPaths(unsigned L1, unsigned L2, bool SubPaths, OnPathFn OnPath);\r
+void AllocBit(unsigned LA, unsigned LB);\r
+\r
+const byte TRACEBITS_DM = 0x01;\r
+const byte TRACEBITS_IM = 0x02;\r
+const byte TRACEBITS_MD = 0x04;\r
+const byte TRACEBITS_MI = 0x08;\r
+const byte TRACEBITS_SM = 0x10;\r
+const byte TRACEBITS_UNINIT = ~0x1f;\r
+\r
+extern Mx<byte> g_Mx_TBBit;\r
+extern float *g_DPRow1;\r
+extern float *g_DPRow2;\r
+extern byte **g_TBBit;\r
+\r
+static inline void Max_xM(float &Score, float MM, float DM, float IM, byte &State)\r
+       {\r
+       Score = MM;\r
+       State = 'M';\r
+\r
+       if (DM > Score)\r
+               {\r
+               Score = DM;\r
+               State = 'D';\r
+               }\r
+       if (IM > Score)\r
+               {\r
+               Score = IM;\r
+               State = 'I';\r
+               }\r
+       }\r
+\r
+static inline void Max_xD(float &Score, float MD, float DD, byte &State)\r
+       {\r
+       if (MD >= DD)\r
+               {\r
+               Score = MD;\r
+               State = 'M';\r
+               }\r
+       else\r
+               {\r
+               Score = DD;\r
+               State = 'D';\r
+               }\r
+       }\r
+\r
+static inline void Max_xI(float &Score, float MI, float II, byte &State)\r
+       {\r
+       if (MI >= II)\r
+               {\r
+               Score = MI;\r
+               State = 'M';\r
+               }\r
+       else\r
+               {\r
+               Score = II;\r
+               State = 'I';\r
+               }\r
+       }\r
+\r
+#endif // dp_h\r
diff --git a/uchime_src/evalue.h b/uchime_src/evalue.h
new file mode 100644 (file)
index 0000000..c9308db
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef evalue_h\r
+#define evalue_h\r
+\r
+#include <float.h>\r
+\r
+void SetKarlin(double GappedLambda, double UngappedLambda,\r
+  double GappedK, double UngappedK, double DBLength);\\r
+\r
+double GetKarlinDBLength();\r
+void SetKarlinDBLength(double DBLength);\r
+void LogKarlin();\r
+void SetKarlinAmino(double DBLength);\r
+void SetKarlinNucleo(double DBLength);\r
+void SetKarlin(double DBLength, bool Nucleo);\r
+double ComputeBitScoreGapped(double Score);\r
+double ComputeBitScoreUngapped(double Score);\r
+double ComputeEvalueGapped(double Score, unsigned QueryLength);\r
+double ComputeEvalueUngapped(double Score, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueAGapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueAUngapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueQGapped(double Evalue, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueQUngapped(double Evalue, unsigned QueryLength);\r
+double ComputeEvalueGappedFromBitScore(double BitScore, unsigned QueryLength);\r
+\r
+#endif // evalue_h\r
diff --git a/uchime_src/fractid.cpp b/uchime_src/fractid.cpp
new file mode 100644 (file)
index 0000000..f298877
--- /dev/null
@@ -0,0 +1,449 @@
+#include "myutils.h"\r
+#include "alpha.h"\r
+\r
+//unsigned g_MaxL = 0;\r
+\r
+static bool *g_IsChar = g_IsAminoChar;\r
+\r
+// Term gaps allowed in query (A) only\r
+static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       if (*Path == 'D')\r
+               {\r
+               if (ptrDesc != 0)\r
+                       sprintf(ptrDesc, "(term gap in Query)");\r
+               return 0;\r
+               }\r
+\r
+       const char *LastM = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               if (*p == 'M')\r
+                       LastM = p;\r
+\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p && p != LastM; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,\r
+  const char *Path, char *ptrDesc)\r
+       {\r
+       unsigned i = 0;\r
+       unsigned FirstM = UINT_MAX;\r
+       unsigned LastM = UINT_MAX;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               if (*p == 'M')\r
+                       {\r
+                       if (FirstM == UINT_MAX)\r
+                               FirstM = i;\r
+                       LastM = i;\r
+                       }\r
+               ++i;\r
+               }\r
+       if (FirstM == UINT_MAX)\r
+               {\r
+               if (ptrDesc != 0)\r
+                       strcpy(ptrDesc, "(no matches)");\r
+               return 0.0;\r
+               }\r
+\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (unsigned i = 0; i < FirstM; ++i)\r
+               {\r
+               char c = Path[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       for (unsigned i = FirstM; i <= LastM; ++i)\r
+               {\r
+               ++Cols;\r
+               char c = Path[i];\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Mismatches = 0;\r
+       unsigned Gaps = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               char c = *p;\r
+               if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))\r
+                       ++Mismatches;\r
+               if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))\r
+                       ++Gaps;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       unsigned Diffs = Gaps + Mismatches;\r
+       double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",\r
+                 Gaps, Diffs, PosB);\r
+       double FractId = 1.0 - FractDiffs;\r
+       if (FractId < 0.0)\r
+               return 0.0;\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Wilds = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               }\r
+                       else\r
+                               ++Wilds;\r
+                       }\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       asserta(Cols >= Wilds);\r
+       Cols -= Wilds;\r
+       double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Wilds = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               }\r
+                       else\r
+                               ++Wilds;\r
+                       }\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       unsigned MinLen = min(PosA, PosB) - Wilds;\r
+       double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);\r
+       return FractId;\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+  bool Nucleo, char *ptrDesc, unsigned IdDef)\r
+       {\r
+       if (Nucleo)\r
+               g_IsChar = g_IsACGTU;\r
+       else\r
+               g_IsChar = g_IsAminoChar;\r
+\r
+       if (Path == 0)\r
+               {\r
+               if (ptrDesc != 0)\r
+                       strcpy(ptrDesc, "(NULL path)");\r
+               return 0.0;\r
+               }\r
+\r
+       unsigned ColCount = (unsigned) strlen(Path);\r
+       if (ColCount == 0)\r
+               return 0.0;\r
+\r
+       if (opt_leftjust)\r
+               {\r
+               if (Path[0] != 'M' || Path[ColCount-1] == 'D')\r
+                       {\r
+                       if (ptrDesc != 0)\r
+                               strcpy(ptrDesc, "(leftjust)");\r
+                       return 0.0;\r
+                       }\r
+               }\r
+\r
+       if (opt_rightjust)\r
+               {\r
+               if (Path[0] == 'D' || Path[ColCount-1] != 'M')\r
+                       {\r
+                       if (ptrDesc != 0)\r
+                               strcpy(ptrDesc, "(rightjust)");\r
+                       return 0.0;\r
+                       }\r
+               }\r
+\r
+       double FractId = 0.0;\r
+       //if (opt_idprefix > 0)\r
+       //      {\r
+       //      for (unsigned i = 0; i < opt_idprefix; ++i)\r
+       //              {\r
+       //              char c = Path[i];\r
+       //              if (c != 'M' || toupper(A[i]) != toupper(B[i]))\r
+       //                      {\r
+       //                      if (ptrDesc != 0)\r
+       //                              sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",\r
+       //                                i, opt_idprefix);\r
+       //                      return 0.0;\r
+       //                      }\r
+       //              }\r
+       //      }\r
+\r
+       //if (opt_idsuffix > 0)\r
+       //      {\r
+       //      unsigned Cols = strlen(Path);\r
+       //      for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)\r
+       //              {\r
+       //              unsigned k = Cols - 1 - i;\r
+       //              char c = Path[k];\r
+       //              if (c != 'M' || toupper(A[k]) != toupper(B[k]))\r
+       //                      {\r
+       //                      if (ptrDesc != 0)\r
+       //                              sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",\r
+       //                                i, opt_idsuffix);\r
+       //                      return 0.0;\r
+       //                      }\r
+       //              }\r
+       //      }\r
+\r
+       if (opt_maxqgap > 0 || opt_maxtgap > 0)\r
+               {\r
+               unsigned L = 0;\r
+               const char *LastM = 0;\r
+               for (const char *p = Path; *p; ++p)\r
+                       if (*p == 'M')\r
+                               LastM = p;\r
+\r
+//             g_MaxL = 0;\r
+               for (const char *p = Path; *p && p != LastM; ++p)\r
+                       {\r
+                       char c = *p;\r
+                       switch (c)\r
+                               {\r
+                       case 'M':\r
+                               if (L > 0)\r
+                                       {\r
+                                       if (p[-1] == 'D')\r
+                                               {\r
+                                               if (L > opt_maxtgap)\r
+                                                       {\r
+                                                       if (ptrDesc != 0)\r
+                                                               sprintf(ptrDesc, "(maxtgap)");\r
+                                                       return 0.0;\r
+                                                       }\r
+                                               }\r
+                                       else if (p[-1] == 'I')\r
+                                               {\r
+                                               if (L > opt_maxqgap)\r
+                                                       {\r
+                                                       if (ptrDesc != 0)\r
+                                                               sprintf(ptrDesc, "(maxqgap)");\r
+                                                       return 0.0;\r
+                                                       }\r
+                                               }\r
+                                       else\r
+                                               asserta(false);\r
+                                       }\r
+                               L = 0;\r
+                               break;\r
+\r
+                       case 'D':\r
+                       case 'I':\r
+                               ++L;\r
+                               //if (L > g_MaxL)\r
+                               //      g_MaxL = L;\r
+                               break;\r
+\r
+                       default:\r
+                               asserta(false);\r
+                               }\r
+                       }\r
+               }\r
+\r
+       switch (IdDef)\r
+               {\r
+       case 0:\r
+               FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 1:\r
+               FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 2:\r
+               FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 3:\r
+               FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 4:\r
+               FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 5:\r
+               FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       default:\r
+               Die("--iddef %u invalid", opt_iddef);\r
+               }\r
+\r
+       return FractId;\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+  bool Nucleo, char *ptrDesc)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path.c_str(), true);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, true);\r
+       }\r
diff --git a/uchime_src/getparents.cpp b/uchime_src/getparents.cpp
new file mode 100644 (file)
index 0000000..d82f902
--- /dev/null
@@ -0,0 +1,89 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+void AddTargets(Ultra &U, const SeqData &Query, set<unsigned> &TargetIndexes);\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los)\r
+       {\r
+       Los.clear();\r
+\r
+       if (L <= opt_minchunk)\r
+               {\r
+               Length = L;\r
+               Los.push_back(0);\r
+               return;\r
+               }\r
+\r
+       Length = (L - 1)/opt_chunks + 1;\r
+       if (Length < opt_minchunk)\r
+               Length = opt_minchunk;\r
+\r
+       unsigned Lo = 0;\r
+       for (;;)\r
+               {\r
+               if (Lo + Length >= L)\r
+                       {\r
+                       Lo = L - Length - 1;\r
+                       Los.push_back(Lo);\r
+                       return;\r
+                       }\r
+               Los.push_back(Lo);\r
+               Lo += Length;\r
+               }\r
+       }\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+  vector<unsigned> &Parents)\r
+       {\r
+       Parents.clear();\r
+\r
+       set<unsigned> TargetIndexes;\r
+\r
+       unsigned QL = QSD.L;\r
+\r
+       SeqData QuerySD = QSD;\r
+\r
+       unsigned ChunkLength;\r
+       vector<unsigned> ChunkLos;\r
+       GetChunkInfo(QL, ChunkLength, ChunkLos);\r
+       unsigned ChunkCount = SIZE(ChunkLos);\r
+       for (unsigned ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)\r
+               {\r
+               unsigned Lo = ChunkLos[ChunkIndex];\r
+               asserta(Lo + ChunkLength <= QL);\r
+\r
+               const byte *Chunk = QSD.Seq + Lo;\r
+\r
+       // THIS MESSES UP --self!!\r
+               //char Prefix[32];\r
+               //sprintf(Prefix, "%u|", Lo);\r
+               //string ChunkLabel = string(Prefix) + string(QSD.Label);\r
+\r
+               //QuerySD.Label = ChunkLabel.c_str();\r
+               QuerySD.Seq = Chunk;\r
+               QuerySD.L = ChunkLength;\r
+\r
+               AddTargets(U, QuerySD, TargetIndexes);\r
+\r
+               Lo += ChunkLength;\r
+               }\r
+\r
+       for (set<unsigned>::const_iterator p = TargetIndexes.begin();\r
+         p != TargetIndexes.end(); ++p)\r
+               {\r
+               unsigned TargetIndex = *p;\r
+               bool Accept = true;\r
+               if (AbQ > 0.0f)\r
+                       {\r
+                       const char *TargetLabel = U.GetSeedLabel(TargetIndex);\r
+                       float AbT = GetAbFromLabel(string(TargetLabel));\r
+                       if (AbT > 0.0f && AbT < opt_abskew*AbQ)\r
+                               Accept = false;\r
+                       }\r
+\r
+               if (Accept)\r
+                       Parents.push_back(TargetIndex);\r
+               }\r
+       }\r
diff --git a/uchime_src/globalalign2.cpp b/uchime_src/globalalign2.cpp
new file mode 100644 (file)
index 0000000..2adfb71
--- /dev/null
@@ -0,0 +1,45 @@
+#if    UCHIMES
+
+#include "dp.h"
+#include "seq.h"
+
+static AlnParams g_AP;
+static bool g_APInitDone = false;
+
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, PathData &PD)\r
+       {\r
+       if (!g_APInitDone)\r
+               {\r
+               g_AP.InitFromCmdLine(true);\r
+               g_APInitDone = true;\r
+               }\r
+\r
+       ViterbiFast(Query.Seq, Query.L, Target.Seq, Target.L, g_AP, PD);\r
+       return true;\r
+       }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path)\r
+       {\r
+       PathData PD;\r
+       GlobalAlign(Query, Target, PD);\r
+       Path = string(PD.Start);\r
+       return true;\r
+       }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &/*AP*/,\r
+  const AlnHeuristics &AH, HSPFinder &/*HF*/, float /*MinFractId*/, float &/*HSPId*/, PathData &PD)\r
+       {\r
+       PD.Clear();\r
+       string Path;\r
+       bool Found = GlobalAlign(Query, Target, Path);\r
+       if (!Found)\r
+               return false;\r
+       unsigned n = SIZE(Path);\r
+       PD.Alloc(n+1);\r
+       memcpy(PD.Front, Path.c_str(), n);\r
+       PD.Start = PD.Front;\r
+       PD.Start[n] = 0;\r
+       return true;\r
+       }\r
+\r
+#endif // UCHIMES\r
diff --git a/uchime_src/help.h b/uchime_src/help.h
new file mode 100644 (file)
index 0000000..9d7a89f
--- /dev/null
@@ -0,0 +1,127 @@
+"\n"
+"Usage\n"
+"-----\n"
+"\n"
+"uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n"
+"    [--uchimealns results.alns]\n"
+"\n"
+"Options\n"
+"-------\n"
+"\n"
+"--input filename\n"
+"    Query sequences in FASTA format.\n"
+"    If the --db option is not specificed, uchime uses de novo\n"
+"    detection. In de novo mode, relative abundance must be given\n"
+"    by a string /ab=xxx/ somewhere in the label, where xxx is a\n"
+"    floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n"
+"\n"
+"--db filename\n"
+"    Reference database in FASTA format.\n"
+"    Optional, if not specified uchime uses de novo mode.\n"
+"\n"
+"    ***WARNING*** The database is searched ONLY on the plus strand.\n"
+"    You MUST include reverse-complemented sequences in the database\n"
+"    if you want both strands to be searched.\n"
+"\n"
+"--abskew x\n"
+"    Minimum abundance skew. Default 1.9. De novo mode only.\n"
+"    Abundance skew is:\n"
+"        min [ abund(parent1), abund(parent2) ] / abund(query).\n"
+"\n"
+"--uchimeout filename\n"
+"    Output in tabbed format with one record per query sequence.\n"
+"    First field is score (h), second field is query label.\n"
+"    For details, see manual.\n"
+"\n"
+"--uchimealns filename\n"
+"    Multiple alignments of query sequences to parents in human-\n"
+"    readable format. Alignments show columns with differences\n"
+"    that support or contradict a chimeric model.\n"
+"\n"
+"--minh h\n"
+"    Mininum score to report chimera. Default 0.3. Values from 0.1\n"
+"    to 5 might be reasonable. Lower values increase sensitivity\n"
+"    but may report more false positives. If you decrease --xn,\n"
+"    you may need to increase --minh, and vice versa.\n"
+"\n"
+"--mindiv div\n"
+"    Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n"
+"    %%identity between query sequence and the closest candidate for\n"
+"    being a parent. If you don't care about very close chimeras,\n"
+"    then you could increase --mindiv to, say, 1.0 or 2.0, and\n"
+"    also decrease --min h, say to 0.1, to increase sensitivity.\n"
+"    How well this works will depend on your data. Best is to\n"
+"    tune parameters on a good benchmark.\n"
+"\n"
+"--xn beta\n"
+"    Weight of a no vote, also called the beta parameter. Default 8.0.\n"
+"    Decreasing this weight to around 3 or 4 may give better\n"
+"    performance on denoised data.\n"
+"\n"
+"--dn n\n"
+"    Pseudo-count prior on number of no votes. Default 1.4. Probably\n"
+"    no good reason to change this unless you can retune to a good\n"
+"    benchmark for your data. Reasonable values are probably in the\n"
+"    range from 0.2 to 2.\n"
+"\n"
+"--xa w\n"
+"    Weight of an abstain vote. Default 1. So far, results do not\n"
+"    seem to be very sensitive to this parameter, but if you have\n"
+"    a good training set might be worth trying. Reasonable values\n"
+"    might range from 0.1 to 2.\n"
+"\n"
+"--chunks n\n"
+"    Number of chunks to extract from the query sequence when searching\n"
+"    for parents. Default 4.\n"
+"\n"
+"--[no]ovchunks\n"
+"    [Do not] use overlapping chunks. Default do not.\n"
+"\n"
+"--minchunk n\n"
+"    Minimum length of a chunk. Default 64.\n"
+"\n"
+"--idsmoothwindow w\n"
+"    Length of id smoothing window. Default 32.\n"
+"\n"
+"--minsmoothid f\n"
+"    Minimum factional identity over smoothed window of candidate parent.\n"
+"    Default 0.95.\n"
+"\n"
+"--maxp n\n"
+"    Maximum number of candidate parents to consider. Default 2. In tests so\n"
+"    far, increasing --maxp gives only a very small improvement in sensivity\n"
+"    but tends to increase the error rate quite a bit.\n"
+"\n"
+"--[no]skipgaps\n"
+"--[no]skipgaps2\n"
+"    These options control how gapped columns affect counting of diffs.\n"
+"    If --skipgaps is specified, columns containing gaps do not found as diffs.\n"
+"    If --skipgaps2 is specified, if column is immediately adjacent to\n"
+"    a column containing a gap, it is not counted as a diff.\n"
+"    Default is --skipgaps --skipgaps2.\n"
+"\n"
+"--minlen L\n"
+"--maxlen L\n"
+"    Minimum and maximum sequence length. Defaults 10, 10000.\n"
+"    Applies to both query and reference sequences.\n"
+"\n"
+"--ucl\n"
+"    Use local-X alignments. Default is global-X. On tests so far, global-X\n"
+"    is always better; this option is retained because it just might work\n"
+"    well on some future type of data.\n"
+"\n"
+"--queryfract f\n"
+"    Minimum fraction of the query sequence that must be covered by a local-X\n"
+"    alignment. Default 0.5. Applies only when --ucl is specified.\n"
+"\n"
+"--quiet\n"
+"    Do not display progress messages on stderr.\n"
+"\n"
+"--log filename\n"
+"    Write miscellaneous information to the log file. Mostly of interest\n"
+"    to me (the algorithm developer). Use --verbose to get more info.\n"
+"\n"
+"--self\n"
+"    In reference database mode, exclude a reference sequence if it has\n"
+"    the same label as the query. This is useful for benchmarking by using\n"
+"    the ref db as a query to test for false positives.\n"
diff --git a/uchime_src/hsp.h b/uchime_src/hsp.h
new file mode 100644 (file)
index 0000000..339256f
--- /dev/null
@@ -0,0 +1,114 @@
+#ifndef hsp_h\r
+#define hsp_h  1\r
+\r
+struct HSPData\r
+       {\r
+       unsigned Loi;\r
+       unsigned Loj;\r
+       unsigned Leni;\r
+       unsigned Lenj;\r
+       float Score;\r
+       unsigned User;\r
+\r
+       unsigned GetLength() const\r
+               {\r
+               if (Leni != Lenj)\r
+                       Die("HSP::GetLength(): Leni %u, Lenj %u, Loi %u, Loj %u, Score %.1f",\r
+                         Leni, Lenj, Loi, Loj, Score);\r
+\r
+               return Leni;\r
+               }\r
+\r
+       unsigned GetHii() const\r
+               {\r
+               assert(Leni > 0);\r
+               return Loi + Leni - 1;\r
+               }\r
+\r
+       unsigned GetHij() const\r
+               {\r
+               assert(Lenj > 0);\r
+               return Loj + Lenj - 1;\r
+               }\r
+\r
+       bool LeftA() const\r
+               {\r
+               return Loi == 0;\r
+               }\r
+\r
+       bool LeftB() const\r
+               {\r
+               return Loj == 0;\r
+               }\r
+\r
+       bool RightA(unsigned LA) const\r
+               {\r
+               return Loi + Leni == LA;\r
+               }\r
+\r
+       bool RightB(unsigned LB) const\r
+               {\r
+               return Loj + Lenj == LB;\r
+               }\r
+\r
+       unsigned GetIdCount(const byte *A, const byte *B) const\r
+               {\r
+               unsigned Count = 0;\r
+               unsigned K = GetLength();\r
+               for (unsigned k = 0; k < K; ++k)\r
+                       {\r
+                       byte a = A[Loi+k];\r
+                       byte b = B[Loj+k];\r
+                       if (toupper(a) == toupper(b))\r
+                               Count++;\r
+                       }\r
+               return Count;\r
+               }\r
+\r
+       double OverlapFract(const HSPData &HSP) const\r
+               {\r
+               if (Leni == 0 || Lenj == 0)\r
+                       return 0.0;\r
+\r
+               unsigned MaxLoi = max(Loi, HSP.Loi);\r
+               unsigned MaxLoj = max(Loj, HSP.Loj);\r
+               unsigned MinHii = min(GetHii(), HSP.GetHii());\r
+               unsigned MinHij = min(GetHij(), HSP.GetHij());\r
+\r
+               unsigned Ovi = (MinHii < MaxLoi) ? 0 : MinHii - MaxLoi;\r
+               unsigned Ovj = (MinHij < MaxLoj) ? 0 : MinHij - MaxLoj;\r
+\r
+               asserta(Ovi <= Leni && Ovj <= Lenj);\r
+               return double(Ovi*Ovj)/double(Leni*Lenj);\r
+               }\r
+\r
+       bool operator<(const HSPData &rhs) const\r
+               {\r
+               return Loi < rhs.Loi;\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("Loi=%u Loj=%u Li=%u Lj=%u Score=%.1f\n", Loi, Loj, Leni, Lenj, Score);\r
+               }\r
+\r
+       void LogMe2() const\r
+               {\r
+               Log("(%u-%u,%u-%u/%.1f)", Loi, GetHii(), Loj, GetHij(), Score);\r
+               }\r
+       };\r
+\r
+// Bendpoint\r
+struct BPData\r
+       {\r
+       unsigned Pos;\r
+       bool IsLo;\r
+       unsigned Index;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index);\r
+               }\r
+       };\r
+\r
+#endif // hsp_h\r
diff --git a/uchime_src/hspfinder.h b/uchime_src/hspfinder.h
new file mode 100644 (file)
index 0000000..2b8e9d8
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef hspfinder_h
+#define hspfinder_h
+
+#include "seq.h"
+
+class HSPFinder
+       {
+public:
+       void SetA(const SeqData &/*SD*/) {}
+       void SetB(const SeqData &/*SD*/) {}
+       };
+
+#endif // hspfinder_h
diff --git a/uchime_src/make3way.cpp b/uchime_src/make3way.cpp
new file mode 100644 (file)
index 0000000..ce88f86
--- /dev/null
@@ -0,0 +1,173 @@
+#include "myutils.h"\r
+#include "sfasta.h"\r
+#include "path.h"\r
+#include "dp.h"\r
+\r
+void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3)\r
+       {\r
+       Q3.clear();\r
+       A3.clear();\r
+       B3.clear();\r
+\r
+#if    DEBUG\r
+       {\r
+       unsigned QLen = 0;\r
+       unsigned ALen = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QLen;\r
+               if (c == 'M' || c == 'I')\r
+                       ++ALen;\r
+               }\r
+       asserta(QLen == QSD.L);\r
+       asserta(ALen == ASD.L);\r
+       }\r
+       {\r
+       unsigned QLen = 0;\r
+       unsigned BLen = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QLen;\r
+               if (c == 'M' || c == 'I')\r
+                       ++BLen;\r
+               }\r
+       asserta(QLen == QSD.L);\r
+       asserta(BLen == BSD.L);\r
+       }\r
+#endif\r
+\r
+       const byte *Q = QSD.Seq;\r
+       const byte *A = ASD.Seq;\r
+       const byte *B = BSD.Seq;\r
+\r
+       unsigned LQ = QSD.L;\r
+       unsigned LA = ASD.L;\r
+       unsigned LB = BSD.L;\r
+\r
+       vector<unsigned> InsertCountsA(LQ+1, 0);\r
+       unsigned QPos = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               else\r
+                       {\r
+                       asserta(c == 'I');\r
+                       asserta(QPos <= LQ);\r
+                       ++(InsertCountsA[QPos]);\r
+                       }\r
+               }\r
+\r
+       vector<unsigned> InsertCountsB(LQ+1, 0);\r
+       QPos = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               else\r
+                       {\r
+                       asserta(c == 'I');\r
+                       asserta(QPos <= LQ);\r
+                       ++(InsertCountsB[QPos]);\r
+                       }\r
+               }\r
+\r
+       vector<unsigned> InsertCounts;\r
+       for (unsigned i = 0; i <= LQ; ++i)\r
+               {\r
+               unsigned is = max(InsertCountsA[i], InsertCountsB[i]);\r
+               InsertCounts.push_back(is);\r
+               }\r
+\r
+       for (unsigned i = 0; i < LQ; ++i)\r
+               {\r
+               for (unsigned k = 0; k < InsertCounts[i]; ++k)\r
+                       Q3.push_back('-');\r
+               asserta(i < LQ);\r
+               Q3.push_back(toupper(Q[i]));\r
+               }\r
+       for (unsigned k = 0; k < InsertCounts[LQ]; ++k)\r
+               Q3.push_back('-');\r
+\r
+// A\r
+       QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned is = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       unsigned isq = InsertCounts[QPos];\r
+                       asserta(is <= isq);\r
+                       for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+                               A3.push_back('-');\r
+                       is = 0;\r
+                       ++QPos;\r
+                       }\r
+               if (c == 'M')\r
+                       {\r
+                       asserta(APos < LA);\r
+                       A3.push_back(toupper(A[APos++]));\r
+                       }\r
+               else if (c == 'D')\r
+                       A3.push_back('-');\r
+               else if (c == 'I')\r
+                       {\r
+                       ++is;\r
+                       asserta(APos < LA);\r
+                       A3.push_back(toupper(A[APos++]));\r
+                       }\r
+               }\r
+       asserta(is <= InsertCounts[LQ]);\r
+       for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+               A3.push_back('-');\r
+       asserta(QPos == LQ);\r
+       asserta(APos == LA);\r
+\r
+// B\r
+       QPos = 0;\r
+       unsigned BPos = 0;\r
+       is = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       asserta(is <= InsertCounts[QPos]);\r
+                       for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+                               B3.push_back('-');\r
+                       is = 0;\r
+                       ++QPos;\r
+                       }\r
+               if (c == 'M')\r
+                       {\r
+                       asserta(BPos < LB);\r
+                       B3.push_back(toupper(B[BPos++]));\r
+                       }\r
+               else if (c == 'D')\r
+                       B3.push_back('-');\r
+               else if (c == 'I')\r
+                       {\r
+                       ++is;\r
+                       asserta(BPos < LB);\r
+                       B3.push_back(toupper(B[BPos++]));\r
+                       }\r
+               }\r
+       asserta(is <= InsertCounts[LQ]);\r
+       for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+               B3.push_back('-');\r
+       asserta(APos == LA);\r
+       asserta(BPos == LB);\r
+\r
+       asserta(SIZE(Q3) == SIZE(A3));\r
+       asserta(SIZE(Q3) == SIZE(B3));\r
+       }\r
diff --git a/uchime_src/mk b/uchime_src/mk
new file mode 100755 (executable)
index 0000000..24aeba0
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/bash
+CPPNames='addtargets2 alignchime alignchimel alnparams alpha alpha2 fractid getparents globalalign2 make3way mx myutils path searchchime seqdb setnucmx sfasta tracebackbit uchime_main usort viterbifast writechhit'
+ObjNames='addtargets2.o alignchime.o alignchimel.o alnparams.o alpha.o alpha2.o fractid.o getparents.o globalalign2.o make3way.o mx.o myutils.o path.o searchchime.o seqdb.o setnucmx.o sfasta.o tracebackbit.o uchime_main.o usort.o viterbifast.o writechhit.o'
+
+rm -f *.o mk.stdout mk.stderr tmp.stderr
+
+for CPPName in $CPPNames
+do
+  echo $CPPName >> /dev/tty
+  g++ $ENV_GCC_OPTS -c -O3 -msse2 -mfpmath=sse -D_FILE_OFFSET_BITS=64 -DNDEBUG=1 -DUCHIMES=1 $CPPName.cpp -o $CPPName.o  >> mk.stdout 2>> tmp.stderr
+       cat tmp.stderr
+       cat tmp.stderr >> mk.stderr
+       rm -f tmp.stderr
+done
+
+LINK_OPTS= 
+if [ `uname -s` == Linux ] ; then
+    LINK_OPTS=-static
+fi
+g++ $LINK_OPTS $ENV_LINK_OPTS -g -o uchime $ObjNames >> mk.stdout 2>> tmp.stderr
+cat tmp.stderr
+cat tmp.stderr >> mk.stderr
+rm -f tmp.stderr
+
+strip uchime
+ls -lh uchime
+sum uchime
diff --git a/uchime_src/mx.cpp b/uchime_src/mx.cpp
new file mode 100644 (file)
index 0000000..48c347e
--- /dev/null
@@ -0,0 +1,294 @@
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+\r
+char ProbToChar(float p);\r
+\r
+list<MxBase *> *MxBase::m_Matrices = 0;\r
+unsigned MxBase::m_AllocCount;\r
+unsigned MxBase::m_ZeroAllocCount;\r
+unsigned MxBase::m_GrowAllocCount;\r
+double MxBase::m_TotalBytes;\r
+double MxBase::m_MaxBytes;\r
+\r
+static const char *LogizeStr(const char *s)\r
+       {\r
+       double d = atof(s);\r
+       d = log(d);\r
+       return TypeToStr<float>(float(d));\r
+       }\r
+\r
+static const char *ExpizeStr(const char *s)\r
+       {\r
+       double d = atof(s);\r
+       d = exp(d);\r
+       return TypeToStr<float>(float(d));\r
+       }\r
+\r
+void MxBase::OnCtor(MxBase *Mx)\r
+       {\r
+       if (m_Matrices == 0)\r
+               m_Matrices = new list<MxBase *>;\r
+       asserta(m_Matrices != 0);\r
+       m_Matrices->push_front(Mx);\r
+       }\r
+\r
+void MxBase::OnDtor(MxBase *Mx)\r
+       {\r
+       if (m_Matrices == 0)\r
+               {\r
+               Warning("MxBase::OnDtor, m_Matrices = 0");\r
+               return;\r
+               }\r
+       for (list<MxBase*>::iterator p = m_Matrices->begin();\r
+         p != m_Matrices->end(); ++p)\r
+               {\r
+               if (*p == Mx)\r
+                       {\r
+                       m_Matrices->erase(p);\r
+                       if (m_Matrices->empty())\r
+                               delete m_Matrices;\r
+                       return;\r
+                       }\r
+               }\r
+       Warning("MxBase::OnDtor, not found");\r
+       }\r
+\r
+//float **MxBase::Getf(const string &Name)\r
+//     {\r
+//     Mx<float> *m = (Mx<float> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(float));\r
+//     return m->GetData();\r
+//     }\r
+//\r
+//double **MxBase::Getd(const string &Name)\r
+//     {\r
+//     Mx<double> *m = (Mx<double> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(double));\r
+//     return m->GetData();\r
+//     }\r
+//\r
+//char **MxBase::Getc(const string &Name)\r
+//     {\r
+//     Mx<char> *m = (Mx<char> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(char));\r
+//     return m->GetData();\r
+//     }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqDB *DB, unsigned IdA, unsigned IdB)\r
+       {\r
+       Alloc(Name, RowCount, ColCount, DB, IdA, IdB, 0, 0);\r
+       }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqData *SA, const SeqData *SB)\r
+       {\r
+       Alloc(Name, RowCount, ColCount, 0, UINT_MAX, UINT_MAX, SA, SB);\r
+       }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqDB *DB, unsigned IdA, unsigned IdB, const SeqData *SA, const SeqData *SB)\r
+       {\r
+       StartTimer(MxBase_Alloc);\r
+\r
+       ++m_AllocCount;\r
+       if (m_AllocatedRowCount == 0)\r
+               ++m_ZeroAllocCount;\r
+\r
+       if (DB != 0)\r
+               {\r
+               asserta(IdA != UINT_MAX);\r
+               asserta(IdB != UINT_MAX);\r
+               asserta(RowCount >= DB->GetSeqLength(IdA) + 1);\r
+               asserta(ColCount >= DB->GetSeqLength(IdB) + 1);\r
+               }\r
+       if (RowCount > m_AllocatedRowCount || ColCount > m_AllocatedColCount)\r
+               {\r
+               if (m_AllocatedRowCount > 0)\r
+                       {\r
+                       if (opt_logmemgrows)\r
+                               Log("MxBase::Alloc grow %s %u x %u -> %u x %u, %s bytes\n",\r
+                                 Name, m_AllocatedRowCount, m_AllocatedColCount,\r
+                                 RowCount, ColCount,\r
+                                 IntToStr(GetBytes()));\r
+                       ++m_GrowAllocCount;\r
+                       }\r
+\r
+               m_TotalBytes -= GetBytes();\r
+\r
+               PauseTimer(MxBase_Alloc);\r
+               StartTimer(MxBase_FreeData);\r
+               FreeData();\r
+               EndTimer(MxBase_FreeData);\r
+               StartTimer(MxBase_Alloc);\r
+\r
+               unsigned N = max(RowCount + 16, m_AllocatedRowCount);\r
+               unsigned M = max(ColCount + 16, m_AllocatedColCount);\r
+               N = max(N, M);\r
+\r
+               PauseTimer(MxBase_Alloc);\r
+               StartTimer(MxBase_AllocData);\r
+               AllocData(N, N);\r
+               EndTimer(MxBase_AllocData);\r
+               StartTimer(MxBase_Alloc);\r
+\r
+               m_TotalBytes += GetBytes();\r
+               if (m_TotalBytes > m_MaxBytes)\r
+                       m_MaxBytes = m_TotalBytes;\r
+               }\r
+       \r
+       unsigned n = sizeof(m_Name)-1;\r
+       strncpy(m_Name, Name, n);\r
+       m_Name[n] = 0;\r
+       m_RowCount = RowCount;\r
+       m_ColCount = ColCount;\r
+       m_SeqDB = DB;\r
+       m_IdA = IdA;\r
+       m_IdB = IdB;\r
+       m_SA = SA;\r
+       m_SB = SB;\r
+\r
+       EndTimer(MxBase_Alloc);\r
+       }\r
+\r
+void MxBase::LogMe(bool WithData, int Opts) const\r
+       {\r
+       Log("\n");\r
+       if (Opts & OPT_EXP)\r
+               Log("Exp ");\r
+       else if (Opts & OPT_LOG)\r
+               Log("Log ");\r
+       bool ZeroBased = ((Opts & OPT_ZERO_BASED) != 0);\r
+       Log("%s(%p) Rows %u/%u, Cols %u/%u",\r
+         m_Name, this,\r
+         m_RowCount, m_AllocatedRowCount,\r
+         m_ColCount, m_AllocatedColCount);\r
+       if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+               Log(", A=%s", m_SeqDB->GetLabel(m_IdA));\r
+       else if (m_SA != 0)\r
+               Log(", A=%s", m_SA->Label);\r
+       if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+               Log(", B=%s", m_SeqDB->GetLabel(m_IdB));\r
+       else if (m_SB != 0)\r
+               Log(", B=%s", m_SB->Label);\r
+       Log("\n");\r
+       if (!WithData || m_RowCount == 0 || m_ColCount == 0)\r
+               return;\r
+\r
+       const char *z = GetAsStr(0, 0);\r
+       unsigned Width = strlen(z);\r
+       unsigned Mod = 1;\r
+       for (unsigned i = 0; i < Width; ++i)\r
+               Mod *= 10;\r
+\r
+       if (m_Alpha[0] != 0)\r
+               {\r
+               Log("// Alphabet=%s\n", m_Alpha);\r
+               Log("//      ");\r
+               unsigned n = strlen(m_Alpha);\r
+               for (unsigned j = 0; j < n; ++j)\r
+                       Log(" %*c", Width, m_Alpha[j]);\r
+               Log("\n");\r
+               for (unsigned i = 0; i < n; ++i)\r
+                       {\r
+                       Log("/* %c */ {", m_Alpha[i]);\r
+                       unsigned ci = m_Alpha[i];\r
+                       for (unsigned j = 0; j < n; ++j)\r
+                               {\r
+                               unsigned cj = m_Alpha[j];\r
+                               Log("%s,", GetAsStr(ci, cj));\r
+                               }\r
+                       Log("},  // %c\n", m_Alpha[i]);\r
+                       }\r
+               return;\r
+               }\r
+       else if (m_Alpha2[0] != 0)\r
+               {\r
+               unsigned n = strlen(m_Alpha2);\r
+               Log("// Alphabet=%s\n", m_Alpha2);\r
+               Log("//      ");\r
+               for (unsigned j = 0; j < n; ++j)\r
+                       Log(" %*c", Width, m_Alpha2[j]);\r
+               Log("\n");\r
+               for (unsigned i = 0; i < n; ++i)\r
+                       {\r
+                       Log("/* %c */ {", m_Alpha2[i]);\r
+                       unsigned ci = m_Alpha2[i];\r
+                       for (unsigned j = 0; j < n; ++j)\r
+                               Log("%s,", GetAsStr(i, j));\r
+                       Log("},  // %c\n", m_Alpha2[i]);\r
+                       }\r
+               return;\r
+               }\r
+\r
+       const byte *A = 0;\r
+       const byte *B = 0;\r
+       if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+               A = m_SeqDB->GetSeq(m_IdA);\r
+       else if (m_SA != 0)\r
+               A = m_SA->Seq;\r
+       if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+               B = m_SeqDB->GetSeq(m_IdB);\r
+       else if (m_SB != 0)\r
+               B = m_SB->Seq;\r
+\r
+       if (B != 0)\r
+               {\r
+               if (A != 0)\r
+                       Log("  ");\r
+               Log("%5.5s", "");\r
+               if (ZeroBased)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               Log("%*c", Width, B[j]);\r
+               else\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               Log("%*c", Width, j == 0 ? ' ' : B[j-1]);\r
+               Log("\n");\r
+               }\r
+\r
+       if (A != 0)\r
+               Log("  ");\r
+       Log("%5.5s", "");\r
+       for (unsigned j = 0; j < m_ColCount; ++j)\r
+               Log("%*u", Width, j%Mod);\r
+       Log("\n");\r
+\r
+       for (unsigned i = 0; i < m_RowCount; ++i)\r
+               {\r
+               if (A != 0)\r
+                       {\r
+                       if (ZeroBased)\r
+                               Log("%c ", A[i]);\r
+                       else\r
+                               Log("%c ", i == 0 ? ' ' : A[i-1]);\r
+                       }\r
+               Log("%4u ", i);\r
+               \r
+               for (unsigned j = 0; j < m_ColCount; ++j)\r
+                       {\r
+                       const char *s = GetAsStr(i, j);\r
+                       if (Opts & OPT_LOG)\r
+                               s = LogizeStr(s);\r
+                       else if (Opts & OPT_EXP)\r
+                               s = ExpizeStr(s);\r
+                       Log("%s", s);\r
+                       }\r
+               Log("\n");\r
+               }\r
+       }\r
+static unsigned g_MatrixFileCount;\r
+\r
+void MxBase::LogCounts()\r
+       {\r
+       Log("\n");\r
+       Log("MxBase::LogCounts()\n");\r
+       Log("      What           N\n");\r
+       Log("----------  ----------\n");\r
+       Log("    Allocs  %10u\n", m_AllocCount);\r
+       Log("ZeroAllocs  %10u\n", m_ZeroAllocCount);\r
+       Log("     Grows  %10u\n", m_GrowAllocCount);\r
+       Log("     Bytes  %10.10s\n", MemBytesToStr(m_TotalBytes));\r
+       Log(" Max bytes  %10.10s\n", MemBytesToStr(m_MaxBytes));\r
+       }\r
diff --git a/uchime_src/mx.h b/uchime_src/mx.h
new file mode 100644 (file)
index 0000000..1438900
--- /dev/null
@@ -0,0 +1,454 @@
+#ifndef mx_h\r
+#define mx_h\r
+\r
+#include <list>\r
+#include <limits.h>\r
+#include <math.h>\r
+#include "timing.h"\r
+#include "myutils.h"\r
+\r
+const int OPT_LOG = 0x01;\r
+const int OPT_EXP = 0x02;\r
+const int OPT_ZERO_BASED = 0x04;\r
+const float MINUS_INFINITY = -9e9f;\r
+const float UNINIT = -8e8f;\r
+\r
+struct SeqData;\r
+\r
+template<class T> const char *TypeToStr(T t)\r
+       {\r
+       Die("Unspecialised TypeToStr() called");\r
+       ureturn(0);\r
+       }\r
+\r
+template<> inline const char *TypeToStr<unsigned short>(unsigned short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12u", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<short>(short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<int>(int f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%5d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<float>(float f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f == UNINIT)\r
+               sprintf(s, "%12.12s", "?");\r
+       else if (f < MINUS_INFINITY/2)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<double>(double f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f < -1e9)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e-5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+static inline const char *FloatToStr(float f, string &s)\r
+       {\r
+       s = TypeToStr<float>(f);\r
+       return s.c_str();\r
+       }\r
+\r
+template<> inline const char *TypeToStr<char>(char c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<byte>(byte c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<bool>(bool tof)\r
+       {\r
+       static char s[2];\r
+       s[0] = tof ? 'T' : 'F';\r
+       return s;\r
+       }\r
+\r
+struct SeqDB;\r
+\r
+struct MxBase\r
+       {\r
+private:\r
+       MxBase(const MxBase &rhs);\r
+       MxBase &operator=(const MxBase &rhs);\r
+\r
+public:\r
+       char m_Name[32];\r
+       char m_Alpha[32];\r
+       char m_Alpha2[32];\r
+       unsigned m_RowCount;\r
+       unsigned m_ColCount;\r
+       unsigned m_AllocatedRowCount;\r
+       unsigned m_AllocatedColCount;\r
+       const SeqDB *m_SeqDB;\r
+       unsigned m_IdA;\r
+       unsigned m_IdB;\r
+       const SeqData *m_SA;\r
+       const SeqData *m_SB;\r
+\r
+       static list<MxBase *> *m_Matrices;\r
+       //static MxBase *Get(const string &Name);\r
+       //static float **Getf(const string &Name);\r
+       //static double **Getd(const string &Name);\r
+       //static char **Getc(const string &Name);\r
+\r
+       static unsigned m_AllocCount;\r
+       static unsigned m_ZeroAllocCount;\r
+       static unsigned m_GrowAllocCount;\r
+       static double m_TotalBytes;\r
+       static double m_MaxBytes;\r
+\r
+       static void OnCtor(MxBase *Mx);\r
+       static void OnDtor(MxBase *Mx);\r
+\r
+       MxBase()\r
+               {\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SeqDB = 0;\r
+               OnCtor(this);\r
+               }\r
+       virtual ~MxBase()\r
+               {\r
+               OnDtor(this);\r
+               }\r
+\r
+       virtual unsigned GetTypeSize() const = 0;\r
+       virtual unsigned GetBytes() const = 0;\r
+\r
+       void Clear()\r
+               {\r
+               FreeData();\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SA = 0;\r
+               m_SB = 0;\r
+               }\r
+\r
+       bool Empty() const\r
+               {\r
+               return m_RowCount == 0;\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0;\r
+       virtual void FreeData() = 0;\r
+       virtual const char *GetAsStr(unsigned i, unsigned j) const = 0;\r
+\r
+       void SetAlpha(const char *Alpha)\r
+               {\r
+               unsigned n = sizeof(m_Alpha);\r
+               strncpy(m_Alpha, Alpha, n);\r
+               m_Alpha[n] = 0;\r
+               }\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB, unsigned IdA, unsigned IdB,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       static void LogAll()\r
+               {\r
+               Log("\n");\r
+               if (m_Matrices == 0)\r
+                       {\r
+                       Log("MxBase::m_Matrices=0\n");\r
+                       return;\r
+                       }\r
+               Log("\n");\r
+               Log("AllRows  AllCols    Sz        MB  Name\n");\r
+               Log("-------  -------  ----  --------  ----\n");\r
+               double TotalMB = 0;\r
+               for (list<MxBase *>::const_iterator p = m_Matrices->begin();\r
+                 p != m_Matrices->end(); ++p)\r
+                       {\r
+                       const MxBase *Mx = *p;\r
+                       if (Mx == 0)\r
+                               continue;\r
+                       //if (Mx->m_RowCount != 0 || ShowEmpty)\r
+                       //      Mx->LogMe(WithData);\r
+                       unsigned ar = Mx->m_AllocatedRowCount;\r
+                       if (ar == 0)\r
+                               continue;\r
+                       unsigned ac = Mx->m_AllocatedColCount;\r
+                       unsigned sz = Mx->GetTypeSize();\r
+                       double MB = (double) ar*(double) ac*(double) sz/1e6;\r
+                       TotalMB += MB;\r
+                       Log("%7u  %7u  %4u  %8.2f  %s\n", ar, ac, sz, MB, Mx->m_Name);\r
+                       }\r
+               Log("                        --------\n");\r
+               Log("%7.7s  %7.7s  %4.4s  %8.2f\n", "", "", "", TotalMB);\r
+               }\r
+\r
+       void LogMe(bool WithData = true, int Opts = 0) const;\r
+       static void LogCounts();\r
+       };\r
+\r
+template<class T> struct Mx : public MxBase\r
+       {\r
+// Disable unimplemented stuff\r
+private:\r
+       Mx(Mx &rhs);\r
+       Mx &operator=(Mx &rhs);\r
+       // const Mx &operator=(const Mx &rhs) const;\r
+\r
+public:\r
+       T **m_Data;\r
+\r
+       Mx()\r
+               {\r
+               m_Data = 0;\r
+               }\r
+       \r
+       ~Mx()\r
+               {\r
+               FreeData();\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount)\r
+               {\r
+               if (opt_logmemgrows)\r
+                       Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n",\r
+                         RowCount, ColCount, IntToStr(GetBytes()), m_Name);\r
+               // m_Data = myalloc<T *>(RowCount);\r
+               m_Data = MYALLOC(T *, RowCount, Mx);\r
+               for (unsigned i = 0; i < RowCount; ++i)\r
+                       // m_Data[i] = myalloc<T>(ColCount);\r
+                       m_Data[i] = MYALLOC(T, ColCount, Mx);\r
+               AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T));\r
+\r
+               m_AllocatedRowCount = RowCount;\r
+               m_AllocatedColCount = ColCount;\r
+               }\r
+\r
+       virtual void FreeData()\r
+               {\r
+               for (unsigned i = 0; i < m_AllocatedRowCount; ++i)\r
+                       MYFREE(m_Data[i], m_AllocatedColCount, Mx);\r
+               MYFREE(m_Data, m_AllocatedRowCount, Mx);\r
+               SubBytes("Mx_AllocData",\r
+                 m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T));\r
+\r
+               m_Data = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               }\r
+\r
+       T **GetData()\r
+               {\r
+               return (T **) m_Data;\r
+               }\r
+\r
+       T Get(unsigned i, unsigned j) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               return m_Data[i][j];\r
+               }\r
+\r
+       void Put(unsigned i, unsigned j, T x) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               m_Data[i][j] = x;\r
+               }\r
+\r
+       T GetOffDiagAvgs(vector<T> &Avgs) const\r
+               {\r
+               if (m_RowCount != m_ColCount)\r
+                       Die("GetOffDiagAvgs, not symmetrical");\r
+               Avgs.clear();\r
+               T Total = T(0);\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       {\r
+                       T Sum = T(0);\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               {\r
+                               if (j == i)\r
+                                       continue;\r
+                               Sum += m_Data[i][j];\r
+                               }\r
+                       T Avg = Sum/(m_RowCount-1);\r
+                       Total += Avg;\r
+                       Avgs.push_back(Avg);\r
+                       }\r
+               return m_RowCount == 0 ? T(0) : Total/m_RowCount;\r
+               }\r
+\r
+       unsigned GetTypeSize() const\r
+               {\r
+               return sizeof(T);\r
+               }\r
+\r
+       virtual unsigned GetBytes() const\r
+               {\r
+               return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() +\r
+                 m_AllocatedRowCount*sizeof(T *);\r
+               }\r
+\r
+       const char *GetAsStr(unsigned i, unsigned j) const\r
+               {\r
+               return TypeToStr<T>(Get(i, j));\r
+               }\r
+\r
+       const T *const *const GetData() const\r
+               {\r
+               return (const T *const *) m_Data;\r
+               }\r
+\r
+       void Copy(const Mx<T> &rhs)\r
+               {\r
+               Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB);\r
+               const T * const *Data = rhs.GetData();\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = Data[i][j];\r
+               }\r
+\r
+       void Assign(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+\r
+       bool Eq(const Mx &rhs, bool Bwd = false) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+               const T * const*d = rhs.GetData();\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       bool EqMask(const Mx &rhs, const Mx<bool> &Mask) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               if (Mask.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (Mask.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               const T * const*d = rhs.GetData();\r
+               bool Bwd = false;\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               if (!Mask.m_Data[i][j])\r
+                                       continue;\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       void Init(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+       };\r
+\r
+void WriteMx(const string &Name, Mx<float> &Mxf);\r
+\r
+template<class T> void ReserveMx(Mx<T> &Mxf, unsigned N = UINT_MAX)\r
+       {\r
+       if (Mxf.m_AllocatedRowCount > 0)\r
+               return;\r
+       extern unsigned g_MaxInputSeqLength;\r
+       if (N == UINT_MAX)\r
+               N = g_MaxInputSeqLength+1;\r
+       Mxf.Alloc("(Reserved)", N, N);\r
+       }\r
+\r
+#endif // mx_h\r
diff --git a/uchime_src/myopts.h b/uchime_src/myopts.h
new file mode 100644 (file)
index 0000000..ba901ea
--- /dev/null
@@ -0,0 +1,190 @@
+#ifndef MY_VERSION\r
+#define MY_VERSION     "4.2"\r
+#endif\r
+\r
+STR_OPT(       input,                                  0)\r
+STR_OPT(       query,                                  0)\r
+STR_OPT(       db,                                             0)\r
+STR_OPT(       sort,                                   0)\r
+STR_OPT(       output,                                 0)\r
+STR_OPT(       uc,                                             0)\r
+STR_OPT(       clstr2uc,                               0)\r
+STR_OPT(       uc2clstr,                               0)\r
+STR_OPT(       uc2fasta,                               0)\r
+STR_OPT(       uc2fastax,                              0)\r
+STR_OPT(       mergesort,                              0)\r
+STR_OPT(       tmpdir,                                 ".")\r
+STR_OPT(       staralign,                              0)\r
+STR_OPT(       sortuc,                                 0)\r
+STR_OPT(       blastout,                               0)\r
+STR_OPT(       blast6out,                              0)\r
+STR_OPT(       fastapairs,                             0)\r
+STR_OPT(       idchar,                                 "|")\r
+STR_OPT(       diffchar,                               " ")\r
+STR_OPT(       uchime,                                 0)\r
+STR_OPT(       gapopen,                                0)\r
+STR_OPT(       gapext,                                 0)\r
+STR_OPT(       uhire,                                  0)\r
+STR_OPT(       ids,                                    "99,98,95,90,85,80,70,50,35")\r
+STR_OPT(       seeds,                                  0)\r
+STR_OPT(       clump,                                  0)\r
+STR_OPT(       clumpout,                               0)\r
+STR_OPT(       clump2fasta,                    0)\r
+STR_OPT(       clumpfasta,                             0)\r
+STR_OPT(       hireout,                                0)\r
+STR_OPT(       mergeclumps,                    0)\r
+STR_OPT(       alpha,                                  0)\r
+STR_OPT(       hspalpha,                               0)\r
+STR_OPT(       probmx,                                 0)\r
+STR_OPT(       matrix,                                 0)\r
+STR_OPT(       tracestate,                             0)\r
+STR_OPT(       chainout,                               0)\r
+STR_OPT(       cluster,                                0)\r
+STR_OPT(       computekl,                              0)\r
+STR_OPT(       userout,                                0)\r
+STR_OPT(       userfields,                             0)\r
+STR_OPT(       seedsout,                               0)\r
+STR_OPT(       chainhits,                              0)\r
+STR_OPT(       findorfs,                               0)\r
+STR_OPT(       strand,                                 0)\r
+STR_OPT(       getseqs,                                0)\r
+STR_OPT(       labels,                                 0)\r
+STR_OPT(       doug,                                   0)\r
+STR_OPT(       makeindex,                              0)\r
+STR_OPT(       indexstats,                             0)\r
+STR_OPT(       uchimeout,                              0)\r
+STR_OPT(       uchimealns,                             0)\r
+STR_OPT(       xframe,                                 0)\r
+STR_OPT(       mkctest,                                0)\r
+STR_OPT(       allpairs,                               0)\r
+STR_OPT(       fastq2fasta,                    0)\r
+STR_OPT(       otusort,                                0)\r
+STR_OPT(       sparsedist,                             0)\r
+STR_OPT(       sparsedistparams,               0)\r
+STR_OPT(       mcc,                                    0)\r
+STR_OPT(       utax,                                   0)\r
+STR_OPT(       simcl,                                  0)\r
+STR_OPT(       absort,                                 0)\r
+STR_OPT(       cc,                                             0)\r
+STR_OPT(       uslink,                                 0)\r
+\r
+UNS_OPT(       band,                                   16,                     0,                      UINT_MAX)\r
+UNS_OPT(       minlen,                                 10,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxlen,                                 10000,          1,                      UINT_MAX)\r
+UNS_OPT(       w,                                              0,                      1,                      UINT_MAX)\r
+UNS_OPT(       k,                                              0,                      1,                      UINT_MAX)\r
+UNS_OPT(       stepwords,                              8,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxaccepts,                             1,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxrejects,                             8,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxtargets,                             0,                      0,                      UINT_MAX)\r
+UNS_OPT(       minhsp,                                 32,                     1,                      UINT_MAX)\r
+UNS_OPT(       bump,                                   50,                     0,                      100)\r
+UNS_OPT(       rowlen,                                 64,                     8,                      UINT_MAX)\r
+UNS_OPT(       idprefix,                               0,                      0,                      UINT_MAX)\r
+UNS_OPT(       idsuffix,                               0,                      0,                      UINT_MAX)\r
+UNS_OPT(       chunks,                                 4,                      2,                      UINT_MAX)\r
+UNS_OPT(       minchunk,                               64,                     2,                      UINT_MAX)\r
+UNS_OPT(       maxclump,                               1000,           1,                      UINT_MAX)\r
+UNS_OPT(       iddef,                                  0,                      0,                      UINT_MAX)\r
+UNS_OPT(       mincodons,                              20,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxovd,                                 8,                      0,                      UINT_MAX)\r
+UNS_OPT(       max2,                                   40,                     0,                      UINT_MAX)\r
+UNS_OPT(       querylen,                               500,            0,                      UINT_MAX)\r
+UNS_OPT(       targetlen,                              500,            0,                      UINT_MAX)\r
+UNS_OPT(       orfstyle,                               (1+2+4),        0,                      UINT_MAX)\r
+UNS_OPT(       dbstep,                                 1,                      1,                      UINT_MAX)\r
+UNS_OPT(       randseed,                               1,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxp,                                   2,                      2,                      UINT_MAX)\r
+UNS_OPT(       idsmoothwindow,                 32,                     1,                      UINT_MAX)\r
+UNS_OPT(       mindiffs,                               3,                      1,                      UINT_MAX)\r
+UNS_OPT(       maxspan1,                               24,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxspan2,                               24,                     1,                      UINT_MAX)\r
+UNS_OPT(       minorfcov,                              16,                     1,                      UINT_MAX)\r
+UNS_OPT(       hashsize,                               4195879,        1,                      UINT_MAX)\r
+UNS_OPT(       maxpoly,                                0,                      0,                      UINT_MAX)\r
+UNS_OPT(       droppct,                                50,                     0,                      100)\r
+UNS_OPT(       secs,                                   10,                     0,                      UINT_MAX)\r
+UNS_OPT(       maxqgap,                                0,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxtgap,                                0,                      0,                      UINT_MAX)\r
+\r
+INT_OPT(       frame,                                  0,                      -3,                     +3)\r
+\r
+TOG_OPT(       trace,                                  false)\r
+TOG_OPT(       logmemgrows,                    false)\r
+TOG_OPT(       trunclabels,                    false)\r
+TOG_OPT(       verbose,                                false)\r
+TOG_OPT(       wordcountreject,                true)\r
+TOG_OPT(       rev,                                    false)\r
+TOG_OPT(       output_rejects,                 false)\r
+TOG_OPT(       blast_termgaps,                 false)\r
+TOG_OPT(       fastalign,                              true)\r
+TOG_OPT(       flushuc,                                false)\r
+TOG_OPT(       stable_sort,                    false)\r
+TOG_OPT(       minus_frames,                   true)\r
+TOG_OPT(       usort,                                  true)\r
+TOG_OPT(       nb,                                             false)\r
+TOG_OPT(       twohit,                                 true)\r
+TOG_OPT(       ssort,                                  false)\r
+TOG_OPT(       log_query,                              false)\r
+TOG_OPT(       log_hothits,                    false)\r
+TOG_OPT(       logwordstats,                   false)\r
+TOG_OPT(       ucl,                                    false)\r
+TOG_OPT(       skipgaps2,                              true)\r
+TOG_OPT(       skipgaps,                               true)\r
+TOG_OPT(       denovo,                                 false)\r
+TOG_OPT(       cartoon_orfs,                   false)\r
+TOG_OPT(       label_ab,                               false)\r
+TOG_OPT(       wordweight,                             false)\r
+TOG_OPT(       isort,                                  false)\r
+TOG_OPT(       selfid,                                 false)\r
+TOG_OPT(       leftjust,                               false)\r
+TOG_OPT(       rightjust,                              false)\r
+\r
+FLT_OPT(       id,                                             0.0,            0.0,            1.0)\r
+FLT_OPT(       weak_id,                                0.0,            0.0,            1.0)\r
+FLT_OPT(       match,                                  1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       mismatch,                               -2.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       split,                                  1000.0,         1.0,            FLT_MAX)\r
+FLT_OPT(       evalue,                                 10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       weak_evalue,                    10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       evalue_g,                               10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       chain_evalue,                   10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_u,                                16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_g,                                32.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_ug,                               16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_nw,                               16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       ka_gapped_lambda,               0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_ungapped_lambda,             0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_gapped_k,                    0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_ungapped_k,                  0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_dbsize,                              0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       chain_targetfract,              0.0,            0.0,            1.0)\r
+FLT_OPT(       targetfract,                    0.0,            0.0,            1.0)\r
+FLT_OPT(       queryfract,                             0.0,            0.0,            1.0)\r
+FLT_OPT(       fspenalty,                              16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       sspenalty,                              20.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       seedt1,                                 13.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       seedt2,                                 11.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       lopen,                                  11.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       lext,                                   1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       minh,                                   0.3,            0.0,            FLT_MAX)\r
+FLT_OPT(       xn,                                             8.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       dn,                                             1.4,            0.0,            FLT_MAX)\r
+FLT_OPT(       xa,                                             1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       mindiv,                                 0.5,            0.0,            100.0)\r
+FLT_OPT(       abskew,                                 2,                      0.0,            100.0)\r
+FLT_OPT(       abx,                                    8.0,            0.0,            100.0)\r
+FLT_OPT(       minspanratio1,                  0.7,            0.0,            1.0)\r
+FLT_OPT(       minspanratio2,                  0.7,            0.0,            1.0)\r
+\r
+FLAG_OPT(      usersort)\r
+FLAG_OPT(      exact)\r
+FLAG_OPT(      optimal)\r
+FLAG_OPT(      self)\r
+FLAG_OPT(      ungapped)\r
+FLAG_OPT(      global)\r
+FLAG_OPT(      local)\r
+FLAG_OPT(      xlat)\r
+FLAG_OPT(      realign)\r
+FLAG_OPT(      hash)\r
+FLAG_OPT(      derep)\r
diff --git a/uchime_src/myutils.cpp b/uchime_src/myutils.cpp
new file mode 100644 (file)
index 0000000..4fa92b1
--- /dev/null
@@ -0,0 +1,1844 @@
+#include <time.h>\r
+#include <stdarg.h>\r
+#include <sys/stat.h>\r
+#include <errno.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <string>\r
+#include <vector>\r
+#include <set>\r
+#include <map>\r
+#include <signal.h>\r
+#include <float.h>\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#include <process.h>\r
+#include <windows.h>\r
+#include <psapi.h>\r
+#include <io.h>\r
+#else\r
+#include <sys/time.h>\r
+#include <sys/resource.h>\r
+#include <unistd.h>\r
+#include <errno.h>\r
+#include <fcntl.h>\r
+#include <stdlib.h>\r
+#endif\r
+\r
+#include "myutils.h"\r
+\r
+const char *SVN_VERSION =\r
+#include "svnversion.h"\r
+;\r
+\r
+#define        TEST_UTILS                      0\r
+\r
+using namespace std;\r
+\r
+const unsigned MY_IO_BUFSIZ = 32000;\r
+const unsigned MAX_FORMATTED_STRING_LENGTH = 64000;\r
+\r
+static char *g_IOBuffers[256];\r
+static time_t g_StartTime = time(0);\r
+static vector<string> g_Argv;\r
+static double g_PeakMemUseBytes;\r
+\r
+#if    TEST_UTILS\r
+void TestUtils()\r
+       {\r
+       const int C = 100000000;\r
+       for (int i = 0; i < C; ++i)\r
+               ProgressStep(i, C, "something or other");\r
+\r
+       Progress("\n");\r
+       Progress("Longer message\r");\r
+       Sleep(1000);\r
+       Progress("Short\r");\r
+       Sleep(1000);\r
+       Progress("And longer again\r");\r
+       Sleep(1000);\r
+       Progress("Shrt\n");\r
+       Sleep(1000);\r
+       const unsigned N = 10;\r
+       unsigned M = 10;\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               ProgressStep(i, N, "Allocating 1MB blocks");\r
+               for (unsigned j = 0; j < M; ++j)\r
+                       {\r
+                       ProgressStep(j, M, "Inner loop"); \r
+                       malloc(100000);\r
+                       Sleep(500);\r
+                       }\r
+               }\r
+       }\r
+#endif // TEST_UTILS\r
+\r
+static void AllocBuffer(FILE *f)\r
+       {\r
+       int fd = fileno(f);\r
+       if (fd < 0 || fd >= 256)\r
+               return;\r
+       if (g_IOBuffers[fd] == 0)\r
+               g_IOBuffers[fd] = myalloc(char, MY_IO_BUFSIZ);\r
+       setvbuf(f, g_IOBuffers[fd], _IOFBF, MY_IO_BUFSIZ);\r
+       }\r
+\r
+static void FreeBuffer(FILE *f)\r
+       {\r
+       int fd = fileno(f);\r
+       if (fd < 0 || fd >= 256)\r
+               return;\r
+       if (g_IOBuffers[fd] == 0)\r
+               return;\r
+       myfree(g_IOBuffers[fd]);\r
+       g_IOBuffers[fd] = 0;\r
+       }\r
+\r
+unsigned GetElapsedSecs()\r
+       {\r
+       return (unsigned) (time(0) - g_StartTime);\r
+       }\r
+\r
+static unsigned g_NewCalls;\r
+static unsigned g_FreeCalls;\r
+static double g_InitialMemUseBytes;\r
+static double g_TotalAllocBytes;\r
+static double g_TotalFreeBytes;\r
+static double g_NetBytes;\r
+static double g_MaxNetBytes;\r
+\r
+void LogAllocStats()\r
+       {\r
+       Log("\n");\r
+       Log("       Allocs  %u\n", g_NewCalls);\r
+       Log("        Frees  %u\n", g_FreeCalls);\r
+       Log("Initial alloc  %s\n", MemBytesToStr(g_InitialMemUseBytes));\r
+       Log("  Total alloc  %s\n", MemBytesToStr(g_TotalAllocBytes));\r
+       Log("   Total free  %s\n", MemBytesToStr(g_TotalFreeBytes));\r
+       Log("    Net bytes  %s\n", MemBytesToStr(g_NetBytes));\r
+       Log("Max net bytes  %s\n", MemBytesToStr(g_MaxNetBytes));\r
+       Log("   Peak total  %s\n", MemBytesToStr(g_MaxNetBytes + g_InitialMemUseBytes));\r
+       }\r
+\r
+bool StdioFileExists(const string &FileName)\r
+       {\r
+       struct stat SD;\r
+       int i = stat(FileName.c_str(), &SD);\r
+       return i == 0;\r
+       }\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line)\r
+       {\r
+       Die("%s(%u) assert failed: %s", File, Line, Exp);\r
+       }\r
+\r
+bool myisatty(int fd)\r
+       {\r
+       return isatty(fd) != 0;\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+#include <io.h>\r
+int fseeko(FILE *stream, off_t offset, int whence)\r
+       {\r
+       off_t FilePos = _fseeki64(stream, offset, whence);\r
+       return (FilePos == -1L) ? -1 : 0;\r
+       }\r
+#define ftello(fm) (off_t) _ftelli64(fm)\r
+#endif\r
+\r
+void LogStdioFileState(FILE *f)\r
+       {\r
+       unsigned long tellpos = (unsigned long) ftello(f);\r
+       long fseek_pos = fseek(f, 0, SEEK_CUR);\r
+       int fd = fileno(f);\r
+       Log("FILE *     %p\n", f);\r
+       Log("fileno     %d\n", fd);\r
+       Log("feof       %d\n", feof(f));\r
+       Log("ferror     %d\n", ferror(f));\r
+       Log("ftell      %ld\n", tellpos);\r
+       Log("fseek      %ld\n", fseek_pos);\r
+#if    !defined(_GNU_SOURCE) && !defined(__APPLE_CC__)\r
+       fpos_t fpos;\r
+       int fgetpos_retval = fgetpos(f, &fpos);\r
+       Log("fpos       %ld (retval %d)\n", (long) fpos, fgetpos_retval);\r
+//     Log("eof        %d\n", _eof(fd));\r
+#endif\r
+#ifdef _MSC_VER\r
+       __int64 pos64 = _ftelli64(f);\r
+       Log("_ftelli64  %lld\n", pos64);\r
+#endif\r
+       }\r
+\r
+FILE *OpenStdioFile(const string &FileName)\r
+       {\r
+       const char *Mode = "rb";\r
+       FILE *f = fopen(FileName.c_str(), Mode);\r
+       if (f == 0)\r
+               {\r
+               if (errno == EFBIG)\r
+                       {\r
+                       if (sizeof(off_t) == 4)\r
+                               Die("File too big, off_t is 32 bits, recompile needed");\r
+                       else\r
+                               Die("Cannot open '%s', file too big (off_t=%u bits)",\r
+                                 FileName.c_str(), sizeof(off_t)*8);\r
+                       }\r
+               Die("Cannot open %s, errno=%d %s",\r
+                 FileName.c_str(), errno, strerror(errno));\r
+               }\r
+       AllocBuffer(f);\r
+       return f;\r
+       }\r
+\r
+FILE *CreateStdioFile(const string &FileName)\r
+       {\r
+       FILE *f = fopen(FileName.c_str(), "wb+");\r
+       if (0 == f)\r
+               Die("Cannot create %s, errno=%d %s",\r
+                 FileName.c_str(), errno, strerror(errno));\r
+       AllocBuffer(f);\r
+       return f;\r
+       }\r
+\r
+void SetStdioFilePos(FILE *f, off_t Pos)\r
+       {\r
+       if (0 == f)\r
+               Die("SetStdioFilePos failed, f=NULL");\r
+       int Ok = fseeko(f, Pos, SEEK_SET);\r
+       off_t NewPos = ftello(f);\r
+       if (Ok != 0 || Pos != NewPos)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("SetStdioFilePos(%d) failed, Ok=%d NewPos=%d",\r
+                 (int) Pos, Ok, (int) NewPos);\r
+               }\r
+       }\r
+\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("ReadStdioFile failed, f=NULL");\r
+       SetStdioFilePos(f, Pos);\r
+       unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+       if (BytesRead != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesRead, errno);\r
+               }\r
+       }\r
+\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("ReadStdioFile failed, f=NULL");\r
+       unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+       if (BytesRead != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesRead, errno);\r
+               }\r
+       }\r
+\r
+// Return values from functions like lseek, ftell, fgetpos are\r
+// "undefined" for files that cannot seek. Attempt to detect\r
+// whether a file can seek by checking for error returns.\r
+bool CanSetStdioFilePos(FILE *f)\r
+       {\r
+// Common special cases\r
+       if (f == stdin || f == stdout || f == stderr)\r
+               return false;\r
+\r
+       fpos_t CurrPos;\r
+       int ok1 = fgetpos(f, &CurrPos);\r
+       if (ok1 < 0)\r
+               return false;\r
+       int ok2 = fseek(f, 0, SEEK_END);\r
+       if (ok2 < 0)\r
+               return false;\r
+       fpos_t EndPos;\r
+       int ok3 = fgetpos(f, &EndPos);\r
+       int ok4 = fsetpos(f, &CurrPos);\r
+       if (!ok3 || !ok4)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+byte *ReadAllStdioFile(FILE *f, unsigned &FileSize)\r
+       {\r
+       const unsigned BUFF_SIZE = 1024*1024;\r
+\r
+       if (CanSetStdioFilePos(f))\r
+               {\r
+               off_t Pos = GetStdioFilePos(f);\r
+               off_t FileSize = GetStdioFileSize(f);\r
+               if (FileSize > UINT_MAX)\r
+                       Die("ReadAllStdioFile: file size > UINT_MAX");\r
+               SetStdioFilePos(f, 0);\r
+               byte *Buffer = myalloc(byte, unsigned(FileSize));\r
+               ReadStdioFile(f, Buffer, unsigned(FileSize));\r
+               SetStdioFilePos(f, Pos);\r
+               FileSize = unsigned(FileSize);\r
+               return Buffer;\r
+               }\r
+\r
+// Can't seek, read one buffer at a time.\r
+       FileSize = 0;\r
+\r
+// Just to initialize so that first call to realloc works.\r
+       byte *Buffer = (byte *) malloc(4);\r
+       if (Buffer == 0)\r
+               Die("ReadAllStdioFile, out of memory");\r
+       for (;;)\r
+               {\r
+               Buffer = (byte *) realloc(Buffer, FileSize + BUFF_SIZE);\r
+               unsigned BytesRead = fread(Buffer + FileSize, 1, BUFF_SIZE, f);\r
+               FileSize += BytesRead;\r
+               if (BytesRead < BUFF_SIZE)\r
+                       {\r
+                       Buffer = (byte *) realloc(Buffer, FileSize);\r
+                       return Buffer;\r
+                       }\r
+               }\r
+       }\r
+\r
+byte *ReadAllStdioFile(const std::string &FileName, off_t &FileSize)\r
+       {\r
+#if    WIN32\r
+       FILE *f = OpenStdioFile(FileName);\r
+       FileSize = GetStdioFileSize(f);\r
+       CloseStdioFile(f);\r
+\r
+       HANDLE h = CreateFile(FileName.c_str(), GENERIC_READ, FILE_SHARE_READ,\r
+         NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);\r
+       if (h == INVALID_HANDLE_VALUE)\r
+               Die("ReadAllStdioFile:Open(%s) failed", FileName.c_str());\r
+\r
+       unsigned uFileSize = (unsigned) FileSize;\r
+       if ((off_t) uFileSize != FileSize)\r
+               Die("File too big (%.1f Gb): %s", double(FileSize)/1e9, FileName.c_str());\r
+\r
+       byte *Buffer = myalloc(byte, uFileSize);\r
+       DWORD BytesRead;\r
+       ReadFile(h, Buffer, uFileSize, &BytesRead, NULL);\r
+       if (FileSize != BytesRead)\r
+               Die("ReadAllStdioFile:Error reading %s, attempted %u got %u",\r
+                 FileName.c_str(), FileSize, (unsigned) BytesRead);\r
+\r
+       CloseHandle(h);\r
+       return Buffer;\r
+#else\r
+       int h = open(FileName.c_str(), O_RDONLY);\r
+       if (h < 0)\r
+               Die("ReadAllStdioFile:Cannot open %s", FileName.c_str());\r
+       FileSize = lseek(h, 0, SEEK_END);\r
+       if (FileSize == (off_t) (-1))\r
+               Die("ReadAllStdioFile:Error seeking %s", FileName.c_str());\r
+       // byte *Buffer = myalloc<byte>(FileSize);\r
+       size_t stBytes = (size_t) FileSize;\r
+       if ((off_t) stBytes != FileSize)\r
+               Die("ReadAllStdioFile: off_t overflow");\r
+       byte *Buffer = (byte *) malloc(stBytes);\r
+       if (Buffer == 0)\r
+               Die("ReadAllStdioFile: failed to allocate %s", MemBytesToStr(stBytes));\r
+       lseek(h, 0, SEEK_SET);\r
+       size_t n = read(h, Buffer, stBytes);\r
+       if (n != FileSize)\r
+               Die("ReadAllStdioFile, Error reading %s, attempted %g got %g",\r
+                 FileName.c_str(), (double) FileSize, (double) n);\r
+       close(h);\r
+       return Buffer;\r
+#endif\r
+       }\r
+\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("WriteStdioFile failed, f=NULL");\r
+       SetStdioFilePos(f, Pos);\r
+       unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+       if (BytesWritten != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesWritten, errno);\r
+               }\r
+       }\r
+\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("WriteStdioFile failed, f=NULL");\r
+       unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+       if (BytesWritten != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesWritten, errno);\r
+               }\r
+       }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes)\r
+       {\r
+       if (feof(f))\r
+               return false;\r
+       if ((int) Bytes < 0)\r
+               Die("ReadLineStdioFile: Bytes < 0");\r
+       char *RetVal = fgets(Line, (int) Bytes, f);\r
+       if (NULL == RetVal)\r
+               {\r
+               if (feof(f))\r
+                       return false;\r
+               if (ferror(f))\r
+                       Die("ReadLineStdioFile: errno=%d", errno);\r
+               Die("ReadLineStdioFile: fgets=0, feof=0, ferror=0");\r
+               }\r
+\r
+       if (RetVal != Line)\r
+               Die("ReadLineStdioFile: fgets != Buffer");\r
+       unsigned n = strlen(Line);\r
+       if (n < 1 || Line[n-1] != '\n')\r
+               Die("ReadLineStdioFile: line too long or missing end-of-line");\r
+       if (n > 0 && (Line[n-1] == '\r' || Line[n-1] == '\n'))\r
+               Line[n-1] = 0;\r
+       if (n > 1 && (Line[n-2] == '\r' || Line[n-2] == '\n'))\r
+               Line[n-2] = 0;\r
+       return true;\r
+       }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, string &Line)\r
+       {\r
+       Line.clear();\r
+       for (;;)\r
+               {\r
+               int c = fgetc(f);\r
+               if (c == -1)\r
+                       {\r
+                       if (feof(f))\r
+                               {\r
+                               if (!Line.empty())\r
+                                       return true;\r
+                               return false;\r
+                               }\r
+                       Die("ReadLineStdioFile, errno=%d", errno);\r
+                       }\r
+               if (c == '\r')\r
+                       continue;\r
+               if (c == '\n')\r
+                       return true;\r
+               Line.push_back((char) c);\r
+               }\r
+       }\r
+\r
+// Copies all of fFrom regardless of current\r
+// file position, appends to fTo.\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo)\r
+       {\r
+       off_t SavedFromPos = GetStdioFilePos(fFrom);\r
+       off_t FileSize = GetStdioFileSize(fFrom);\r
+       const off_t BUFF_SIZE = 1024*1024;\r
+       char *Buffer = myalloc(char, BUFF_SIZE);\r
+       SetStdioFilePos(fFrom, 0);\r
+       off_t BytesRemaining = FileSize;\r
+       while (BytesRemaining > 0)\r
+               {\r
+               off_t BytesToRead = BytesRemaining;\r
+               if (BytesToRead > BUFF_SIZE)\r
+                       BytesToRead = BUFF_SIZE;\r
+               ReadStdioFile(fFrom, Buffer, (unsigned) BytesToRead);\r
+               WriteStdioFile(fTo, Buffer, (unsigned) BytesToRead);\r
+               BytesRemaining -= BytesToRead;\r
+               }\r
+       SetStdioFilePos(fFrom, SavedFromPos);\r
+       }\r
+\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo)\r
+       {\r
+       int Ok = rename(FileNameFrom.c_str(), FileNameTo.c_str());\r
+       if (Ok != 0)\r
+               Die("RenameStdioFile(%s,%s) failed, errno=%d %s",\r
+                 FileNameFrom.c_str(), FileNameTo.c_str(), errno, strerror(errno));\r
+       }\r
+\r
+void FlushStdioFile(FILE *f)\r
+       {\r
+       int Ok = fflush(f);\r
+       if (Ok != 0)\r
+               Die("fflush(%p)=%d,", f, Ok);\r
+       }\r
+\r
+void CloseStdioFile(FILE *f)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+       int Ok = fclose(f);\r
+       if (Ok != 0)\r
+               Die("fclose(%p)=%d", f, Ok);\r
+       FreeBuffer(f);\r
+       }\r
+\r
+off_t GetStdioFilePos(FILE *f)\r
+       {\r
+       off_t FilePos = ftello(f);\r
+       if (FilePos < 0)\r
+               Die("ftello=%d", (int) FilePos);\r
+       return FilePos;\r
+       }\r
+\r
+off_t GetStdioFileSize(FILE *f)\r
+       {\r
+       off_t CurrentPos = GetStdioFilePos(f);\r
+       int Ok = fseeko(f, 0, SEEK_END);\r
+       if (Ok < 0)\r
+               Die("fseek in GetFileSize");\r
+\r
+       off_t Length = ftello(f);\r
+       if (Length < 0)\r
+               Die("ftello in GetFileSize");\r
+       SetStdioFilePos(f, CurrentPos);\r
+       return Length;\r
+       }\r
+\r
+void DeleteStdioFile(const string &FileName)\r
+       {\r
+       int Ok = remove(FileName.c_str());\r
+       if (Ok != 0)\r
+               Die("remove(%s) failed, errno=%d %s", FileName.c_str(), errno, strerror(errno));\r
+       }\r
+\r
+void myvstrprintf(string &Str, const char *Format, va_list ArgList)\r
+       {\r
+       static char szStr[MAX_FORMATTED_STRING_LENGTH];\r
+       vsnprintf(szStr, MAX_FORMATTED_STRING_LENGTH-1, Format, ArgList);\r
+       szStr[MAX_FORMATTED_STRING_LENGTH - 1] = '\0';\r
+       Str.assign(szStr);\r
+       }\r
+\r
+void myvstrprintf(string &Str, const char *Format, ...)\r
+       {\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+       }\r
+\r
+FILE *g_fLog = 0;\r
+\r
+void SetLogFileName(const string &FileName)\r
+       {\r
+       if (g_fLog != 0)\r
+               CloseStdioFile(g_fLog);\r
+       g_fLog = 0;\r
+       if (FileName.empty())\r
+               return;\r
+       g_fLog = CreateStdioFile(FileName);\r
+       }\r
+\r
+void Log(const char *Format, ...)\r
+       {\r
+       if (g_fLog == 0)\r
+               return;\r
+\r
+       static bool InLog = false;\r
+       if (InLog)\r
+               return;\r
+\r
+       InLog = true;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       vfprintf(g_fLog, Format, ArgList);\r
+       va_end(ArgList);\r
+       fflush(g_fLog);\r
+       InLog = false;\r
+       }\r
+\r
+void Die(const char *Format, ...)\r
+       {\r
+       static bool InDie = false;\r
+       if (InDie)\r
+               exit(1);\r
+       InDie = true;\r
+       string Msg;\r
+\r
+       if (g_fLog != 0)\r
+               setbuf(g_fLog, 0);\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Msg, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       fprintf(stderr, "\n\n");\r
+       Log("\n");\r
+       time_t t = time(0);\r
+       Log("%s", asctime(localtime(&t)));\r
+       for (unsigned i = 0; i < g_Argv.size(); i++)\r
+               {\r
+               fprintf(stderr, (i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+               Log((i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+               }\r
+       fprintf(stderr, "\n");\r
+       Log("\n");\r
+\r
+       time_t CurrentTime = time(0);\r
+       unsigned ElapsedSeconds = unsigned(CurrentTime - g_StartTime);\r
+       const char *sstr = SecsToStr(ElapsedSeconds);\r
+       Log("Elapsed time: %s\n", sstr);\r
+\r
+       const char *szStr = Msg.c_str();\r
+       fprintf(stderr, "\n---Fatal error---\n%s\n", szStr);\r
+       Log("\n---Fatal error---\n%s\n", szStr);\r
+\r
+#ifdef _MSC_VER\r
+       if (IsDebuggerPresent())\r
+               __debugbreak();\r
+       _CrtSetDbgFlag(0);\r
+#endif\r
+\r
+       exit(1);\r
+       }\r
+\r
+void Warning(const char *Format, ...)\r
+       {\r
+       string Msg;\r
+\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Msg, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       const char *szStr = Msg.c_str();\r
+\r
+       fprintf(stderr, "\nWARNING: %s\n", szStr);\r
+       if (g_fLog != stdout)\r
+               {\r
+               Log("\nWARNING: %s\n", szStr);\r
+               fflush(g_fLog);\r
+               }\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+double GetMemUseBytes()\r
+       {\r
+       HANDLE hProc = GetCurrentProcess();\r
+       PROCESS_MEMORY_COUNTERS PMC;\r
+       BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));\r
+       if (!bOk)\r
+               return 1000000;\r
+       double Bytes = (double) PMC.WorkingSetSize;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#elif  linux || __linux__\r
+double GetMemUseBytes()\r
+       {\r
+       static char statm[64];\r
+       static int PageSize = 1;\r
+       if (0 == statm[0])\r
+               {\r
+               PageSize = sysconf(_SC_PAGESIZE);\r
+               pid_t pid = getpid();\r
+               sprintf(statm, "/proc/%d/statm", (int) pid);\r
+               }\r
+\r
+       int fd = open(statm, O_RDONLY);\r
+       if (-1 == fd)\r
+               return 1000000;\r
+       char Buffer[64];\r
+       int n = read(fd, Buffer, sizeof(Buffer) - 1);\r
+       close(fd);\r
+       fd = -1;\r
+\r
+       if (n <= 0)\r
+               return 1000000;\r
+\r
+       Buffer[n] = 0;\r
+       double Pages = atof(Buffer);\r
+\r
+       double Bytes = Pages*PageSize;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#elif defined(__MACH__)\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+#include <stdio.h>\r
+#include <unistd.h>\r
+#include <sys/types.h>\r
+#include <sys/sysctl.h>\r
+#include <sys/socket.h>\r
+#include <sys/gmon.h>\r
+#include <mach/vm_param.h>\r
+#include <netinet/in.h>\r
+#include <netinet/icmp6.h>\r
+#include <sys/vmmeter.h>\r
+#include <sys/proc.h>\r
+#include <mach/vm_statistics.h>\r
+#include <mach/task_info.h>\r
+#include <mach/task.h>\r
+#include <mach/mach_init.h>\r
+\r
+#define DEFAULT_MEM_USE        100000000.0\r
+\r
+double GetMemUseBytes()\r
+       {\r
+       task_t mytask = mach_task_self();\r
+       struct task_basic_info ti;\r
+       memset((void *) &ti, 0, sizeof(ti));\r
+       mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;\r
+       kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);\r
+       if (ok == KERN_INVALID_ARGUMENT)\r
+               return DEFAULT_MEM_USE;\r
+\r
+       if (ok != KERN_SUCCESS)\r
+               return DEFAULT_MEM_USE;\r
+\r
+       double Bytes = (double ) ti.resident_size;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#else\r
+double GetMemUseBytes()\r
+       {\r
+       return 0;\r
+       }\r
+#endif\r
+\r
+double GetPeakMemUseBytes()\r
+       {\r
+       return g_PeakMemUseBytes;\r
+       }\r
+\r
+const char *SecsToHHMMSS(int Secs)\r
+       {\r
+       int HH = Secs/3600;\r
+       int MM = (Secs - HH*3600)/60;\r
+       int SS = Secs%60;\r
+       static char Str[16];\r
+       if (HH == 0)\r
+               sprintf(Str, "%02d:%02d", MM, SS);\r
+       else\r
+               sprintf(Str, "%02d:%02d:%02d", HH, MM, SS);\r
+       return Str;\r
+       }\r
+\r
+const char *SecsToStr(double Secs)\r
+       {\r
+       if (Secs >= 10.0)\r
+               return SecsToHHMMSS((int) Secs);\r
+\r
+       static char Str[16];\r
+       if (Secs < 1e-6)\r
+               sprintf(Str, "%.2gs", Secs);\r
+       else if (Secs < 1e-3)\r
+               sprintf(Str, "%.2fms", Secs*1e3);\r
+       else\r
+               sprintf(Str, "%.3fs", Secs);\r
+       return Str;\r
+       }\r
+\r
+const char *MemBytesToStr(double Bytes)\r
+       {\r
+       static char Str[32];\r
+\r
+       if (Bytes < 1e6)\r
+               sprintf(Str, "%.1fkb", Bytes/1e3);\r
+       else if (Bytes < 10e6)\r
+               sprintf(Str, "%.1fMb", Bytes/1e6);\r
+       else if (Bytes < 1e9)\r
+               sprintf(Str, "%.0fMb", Bytes/1e6);\r
+       else if (Bytes < 10e9)\r
+               sprintf(Str, "%.1fGb", Bytes/1e9);\r
+       else if (Bytes < 100e9)\r
+               sprintf(Str, "%.0fGb", Bytes/1e9);\r
+       else\r
+               sprintf(Str, "%.3gb", Bytes);\r
+       return Str;\r
+       }\r
+\r
+const char *IntToStr(unsigned i)\r
+       {\r
+       static char Str[32];\r
+\r
+       double d = (double) i;\r
+       if (i < 10000)\r
+               sprintf(Str, "%u", i);\r
+       else if (i < 1e6)\r
+               sprintf(Str, "%.1fk", d/1e3);\r
+       else if (i < 10e6)\r
+               sprintf(Str, "%.1fM", d/1e6);\r
+       else if (i < 1e9)\r
+               sprintf(Str, "%.0fM", d/1e6);\r
+       else if (i < 10e9)\r
+               sprintf(Str, "%.1fG", d/1e9);\r
+       else if (i < 100e9)\r
+               sprintf(Str, "%.0fG", d/1e9);\r
+       else\r
+               sprintf(Str, "%.3g", d);\r
+       return Str;\r
+       }\r
+\r
+const char *FloatToStr(double d)\r
+       {\r
+       static char Str[32];\r
+\r
+       double a = fabs(d);\r
+       if (a < 0.01)\r
+               sprintf(Str, "%.3g", a);\r
+       else if (a >= 0.01 && a < 1)\r
+               sprintf(Str, "%.3f", a);\r
+       else if (a <= 10 && a >= 1)\r
+               {\r
+               double intpart;\r
+               if (modf(a, &intpart) < 0.05)\r
+                       sprintf(Str, "%.0f", d);\r
+               else\r
+                       sprintf(Str, "%.1f", d);\r
+               }\r
+       else if (a > 10 && a < 10000)\r
+               sprintf(Str, "%.0f", d);\r
+       else if (a < 1e6)\r
+               sprintf(Str, "%.1fk", d/1e3);\r
+       else if (a < 10e6)\r
+               sprintf(Str, "%.1fM", d/1e6);\r
+       else if (a < 1e9)\r
+               sprintf(Str, "%.0fM", d/1e6);\r
+       else if (a < 10e9)\r
+               sprintf(Str, "%.1fG", d/1e9);\r
+       else if (a < 100e9)\r
+               sprintf(Str, "%.0fG", d/1e9);\r
+       else\r
+               sprintf(Str, "%.3g", d);\r
+       return Str;\r
+       }\r
+\r
+bool opt_quiet = false;\r
+bool opt_version = false;\r
+bool opt_logopts = false;\r
+bool opt_compilerinfo = false;\r
+bool opt_help = false;\r
+string opt_log = "";\r
+\r
+bool optset_quiet = false;\r
+bool optset_version = false;\r
+bool optset_logopts = false;\r
+bool optset_compilerinfo = false;\r
+bool optset_help = false;\r
+bool optset_log = false;\r
+\r
+static string g_CurrentProgressLine;\r
+static string g_ProgressDesc;\r
+static unsigned g_ProgressIndex;\r
+static unsigned g_ProgressCount;\r
+\r
+static unsigned g_CurrProgressLineLength;\r
+static unsigned g_LastProgressLineLength;\r
+static unsigned g_CountsInterval;\r
+static unsigned g_StepCalls;\r
+static time_t g_TimeLastOutputStep;\r
+\r
+static string &GetProgressPrefixStr(string &s)\r
+       {\r
+       double Bytes = GetMemUseBytes();\r
+       unsigned Secs = GetElapsedSecs();\r
+       s = string(SecsToHHMMSS(Secs));\r
+       if (Bytes > 0)\r
+               {\r
+               s.push_back(' ');\r
+               char Str[32];\r
+               sprintf(Str, "%5.5s", MemBytesToStr(Bytes));\r
+               s += string(Str);\r
+               }\r
+       s.push_back(' ');\r
+       return s;\r
+       }\r
+\r
+void ProgressLog(const char *Format, ...)\r
+       {\r
+       string Str;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       Log("%s", Str.c_str());\r
+       Progress("%s", Str.c_str());\r
+       }\r
+\r
+void Progress(const char *Format, ...)\r
+       {\r
+       if (opt_quiet)\r
+               return;\r
+\r
+       string Str;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+#if    0\r
+       Log("Progress(");\r
+       for (unsigned i = 0; i < Str.size(); ++i)\r
+               {\r
+               char c = Str[i];\r
+               if (c == '\r')\r
+                       Log("\\r");\r
+               else if (c == '\n')\r
+                       Log("\\n");\r
+               else\r
+                       Log("%c", c);\r
+               }\r
+       Log(")\n");\r
+#endif //0\r
+\r
+       for (unsigned i = 0; i < Str.size(); ++i)\r
+               {\r
+               if (g_CurrProgressLineLength == 0)\r
+                       {\r
+                       string s;\r
+                       GetProgressPrefixStr(s);\r
+                       for (unsigned j = 0; j < s.size(); ++j)\r
+                               {\r
+                               fputc(s[j], stderr);\r
+                               ++g_CurrProgressLineLength;\r
+                               }\r
+                       }\r
+\r
+               char c = Str[i];\r
+               if (c == '\n' || c == '\r')\r
+                       {\r
+                       for (unsigned j = g_CurrProgressLineLength; j < g_LastProgressLineLength; ++j)\r
+                               fputc(' ', stderr);\r
+                       if (c == '\n')\r
+                               g_LastProgressLineLength = 0;\r
+                       else\r
+                               g_LastProgressLineLength = g_CurrProgressLineLength;\r
+                       g_CurrProgressLineLength = 0;\r
+                       fputc(c, stderr);\r
+                       }\r
+               else\r
+                       {\r
+                       fputc(c, stderr);\r
+                       ++g_CurrProgressLineLength;\r
+                       }\r
+               }\r
+       }\r
+\r
+void ProgressExit()\r
+       {\r
+       time_t Now = time(0);\r
+       struct tm *t = localtime(&Now);\r
+       const char *s = asctime(t);\r
+       unsigned Secs = GetElapsedSecs();\r
+\r
+       Log("\n");\r
+       Log("Finished %s", s); // there is a newline in s\r
+       Log("Elapsed time %s\n", SecsToHHMMSS((int) Secs));\r
+       Log("Max memory %s\n", MemBytesToStr(g_PeakMemUseBytes));\r
+#if    WIN32 && DEBUG\r
+// Skip exit(), which can be very slow in DEBUG build\r
+// VERY DANGEROUS practice, because it skips global destructors.\r
+// But if you know the rules, you can break 'em, right?\r
+       ExitProcess(0);\r
+#endif\r
+       }\r
+\r
+const char *PctStr(double x, double y)\r
+       {\r
+       if (y == 0)\r
+               {\r
+               if (x == 0)\r
+                       return "100%";\r
+               else\r
+                       return "inf%";\r
+               }\r
+       static char Str[16];\r
+       double p = x*100.0/y;\r
+       sprintf(Str, "%5.1f%%", p);\r
+       return Str;\r
+       }\r
+\r
+string &GetProgressLevelStr(string &s)\r
+       {\r
+       unsigned Index = g_ProgressIndex;\r
+       unsigned Count = g_ProgressCount;\r
+       if (Count == UINT_MAX)\r
+               {\r
+               if (Index == UINT_MAX)\r
+                       s = "100%";\r
+               else\r
+                       {\r
+                       char Tmp[16];\r
+                       sprintf(Tmp, "%u", Index); \r
+                       s = Tmp;\r
+                       }\r
+               }\r
+       else\r
+               s = string(PctStr(Index+1, Count));\r
+       s += string(" ") + g_ProgressDesc;\r
+       return s;\r
+       }\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...)\r
+       {\r
+       if (opt_quiet)\r
+               return;\r
+\r
+       if (i == 0)\r
+               {\r
+               string Str;\r
+               va_list ArgList;\r
+               va_start(ArgList, Format);\r
+               myvstrprintf(Str, Format, ArgList);\r
+               va_end(ArgList);\r
+               g_ProgressDesc = Str;\r
+               g_ProgressIndex = 0;\r
+               g_ProgressCount = N;\r
+               g_CountsInterval = 1;\r
+               g_StepCalls = 0;\r
+               g_TimeLastOutputStep = 0;\r
+               if (g_CurrProgressLineLength > 0)\r
+                       Progress("\n");\r
+               }\r
+\r
+       if (i >= N && i != UINT_MAX)\r
+               Die("ProgressStep(%u,%u)", i, N);\r
+       bool IsLastStep = (i == UINT_MAX || i + 1 == N);\r
+       if (!IsLastStep)\r
+               {\r
+               ++g_StepCalls;\r
+               if (g_StepCalls%g_CountsInterval != 0)\r
+                       return;\r
+\r
+               time_t Now = time(0);\r
+               if (Now == g_TimeLastOutputStep)\r
+                       {\r
+                       if (g_CountsInterval < 128)\r
+                               g_CountsInterval = (g_CountsInterval*3)/2;\r
+                       else\r
+                               g_CountsInterval += 64;\r
+                       return;\r
+                       }\r
+               else\r
+                       {\r
+                       time_t Secs = Now - g_TimeLastOutputStep;\r
+                       if (Secs > 1)\r
+                               g_CountsInterval = unsigned(g_CountsInterval/(Secs*8));\r
+                       }\r
+\r
+               if (g_CountsInterval < 1)\r
+                       g_CountsInterval = 1;\r
+\r
+               g_TimeLastOutputStep = Now;\r
+               }\r
+\r
+       g_ProgressIndex = i;\r
+\r
+       if (i > 0)\r
+               {\r
+               va_list ArgList;\r
+               va_start(ArgList, Format);\r
+               myvstrprintf(g_ProgressDesc, Format, ArgList);\r
+               }\r
+\r
+       string LevelStr;\r
+       GetProgressLevelStr(LevelStr);\r
+       Progress(" %s\r", LevelStr.c_str());\r
+\r
+       if (IsLastStep)\r
+               {\r
+               g_CountsInterval = 1;\r
+               fputc('\n', stderr);\r
+               }\r
+       }\r
+\r
+enum OptType\r
+       {\r
+       OT_Flag,\r
+       OT_Tog,\r
+       OT_Int,\r
+       OT_Uns,\r
+       OT_Str,\r
+       OT_Float,\r
+       OT_Enum\r
+       };\r
+\r
+struct OptInfo\r
+       {\r
+       void *Value;\r
+       bool *OptSet;\r
+       string LongName;\r
+       OptType Type;\r
+       int iMin;\r
+       int iMax;\r
+       unsigned uMin;\r
+       unsigned uMax;\r
+       double dMin;\r
+       double dMax;\r
+       map<string, unsigned> EnumValues;\r
+\r
+       bool bDefault;\r
+       int iDefault;\r
+       unsigned uDefault;\r
+       double dDefault;\r
+       string strDefault;\r
+\r
+       string Help;\r
+\r
+       bool operator<(const OptInfo &rhs) const\r
+               {\r
+               return LongName < rhs.LongName;\r
+               }\r
+       };\r
+\r
+static set<OptInfo> g_Opts;\r
+\r
+void Help()\r
+       {\r
+       printf("\n");\r
+\r
+       void Usage();\r
+       Usage();\r
+\r
+       for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+\r
+               printf("\n");\r
+               string LongName = Opt.LongName.c_str();\r
+               if (Opt.Type == OT_Tog)\r
+                       LongName = string("[no]") + LongName;\r
+               printf("  --%s ", LongName.c_str());\r
+\r
+               switch (Opt.Type)\r
+                       {\r
+               case OT_Flag:\r
+                       break;\r
+               case OT_Tog:\r
+                       break;\r
+               case OT_Int:\r
+                       printf("<int>");\r
+                       break;\r
+               case OT_Uns:\r
+                       printf("<uint>");\r
+                       break;\r
+               case OT_Str:\r
+                       printf("<str>");\r
+                       break;\r
+               case OT_Float:\r
+                       printf("<float>");\r
+                       break;\r
+               case OT_Enum:\r
+                       printf("<enum>");\r
+                       break;\r
+               default:\r
+                       printf("??type");\r
+                       break;\r
+                       }\r
+\r
+               printf("  ");\r
+               const string &s = Opt.Help;\r
+               for (string::const_iterator q = s.begin(); q != s.end(); ++q)\r
+                       {\r
+                       char c = *q;\r
+                       if (c == '\n')\r
+                               printf("\n   ");\r
+                       else\r
+                               printf("%c", c);\r
+                       }\r
+               printf("\n");\r
+               }\r
+       printf("\n");\r
+       exit(0);\r
+       }\r
+\r
+void CmdLineErr(const char *Format, ...)\r
+       {\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       string Str;\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+       fprintf(stderr, "\n");\r
+       fprintf(stderr, "Invalid command line\n");\r
+       fprintf(stderr, "%s\n", Str.c_str());\r
+       fprintf(stderr, "For list of command-line options use --help.\n");\r
+       fprintf(stderr, "\n");\r
+       exit(1);\r
+       }\r
+\r
+static set<OptInfo>::iterator GetOptInfo(const string &LongName,\r
+  bool ErrIfNotFound)\r
+       {\r
+       for (set<OptInfo>::iterator p = g_Opts.begin();\r
+         p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+               if (Opt.LongName == LongName)\r
+                       return p;\r
+               if (Opt.Type == OT_Tog && "no" + Opt.LongName == LongName)\r
+                       return p;\r
+               }\r
+       if (ErrIfNotFound)\r
+               CmdLineErr("Option --%s is invalid", LongName.c_str());\r
+       return g_Opts.end();\r
+       }\r
+\r
+static void AddOpt(const OptInfo &Opt)\r
+       {\r
+       if (GetOptInfo(Opt.LongName, false) != g_Opts.end())\r
+               Die("Option --%s defined twice", Opt.LongName.c_str());\r
+       g_Opts.insert(Opt);\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+#pragma warning(disable: 4505) // unreferenced local function\r
+#endif\r
+\r
+static void DefineFlagOpt(const string &LongName, const string &Help,\r
+  void *Value, bool *OptSet)\r
+       {\r
+       *(bool *) Value = false;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.bDefault = false;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Flag;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineTogOpt(const string &LongName, bool Default, const string &Help,\r
+  void *Value, bool *OptSet)\r
+       {\r
+       *(bool *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.bDefault = Default;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Tog;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineIntOpt(const string &LongName, int Default, int Min, int Max,\r
+  const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(int *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.iDefault = Default;\r
+       Opt.iMin = Min;\r
+       Opt.iMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Int;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineUnsOpt(const string &LongName, unsigned Default, unsigned Min,\r
+  unsigned Max, const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(unsigned *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.uDefault = Default;\r
+       Opt.uMin = Min;\r
+       Opt.uMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Uns;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineFloatOpt(const string &LongName, double Default, double Min,\r
+  double Max, const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(double *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.dDefault = Default;\r
+       Opt.dMin = Min;\r
+       Opt.dMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Float;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineStrOpt(const string &LongName, const char *Default,\r
+  const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(string *) Value = (Default == 0 ? "" : string(Default));\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.strDefault = (Default == 0 ? "" : string(Default));\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Str;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void ParseEnumValues(const string &Values, map<string, unsigned> &EnumValues)\r
+       {\r
+       EnumValues.clear();\r
+       \r
+       string Name;\r
+       string Value;\r
+       bool Eq = false;\r
+       for (string::const_iterator p = Values.begin(); ; ++p)\r
+               {\r
+               char c = (p == Values.end() ? '|' : *p);\r
+               if (isspace(c))\r
+                       ;\r
+               else if (c == '|')\r
+                       {\r
+                       if (EnumValues.find(Name) != EnumValues.end())\r
+                               Die("Invalid enum values, '%s' defined twice: '%s'",\r
+                                 Name.c_str(), Values.c_str());\r
+                       if (Name.empty() || Value.empty())\r
+                               Die("Invalid enum values, empty name or value: '%s'",\r
+                                 Values.c_str());\r
+\r
+                       EnumValues[Name] = atoi(Value.c_str());\r
+                       Name.clear();\r
+                       Value.clear();\r
+                       Eq = false;\r
+                       }\r
+               else if (c == '=')\r
+                       Eq = true;\r
+               else if (Eq)\r
+                       Value.push_back(c);\r
+               else\r
+                       Name.push_back(c);\r
+               if (p == Values.end())\r
+                       return;\r
+               }\r
+       }\r
+\r
+static void DefineEnumOpt(const string &LongName, const string &ShortName,\r
+  int Default, const string &Values, const string &Help, void *Value)\r
+       {\r
+       *(int *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.LongName = LongName;\r
+       Opt.iDefault = Default;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Enum;\r
+       ParseEnumValues(Values, Opt.EnumValues);\r
+       AddOpt(Opt);\r
+       }\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName)                                                     bool opt_##LongName; bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default)                                     bool opt_##LongName; bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max)           int opt_##LongName; bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max)           unsigned opt_##LongName; bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max)           double opt_##LongName; bool optset_##LongName;\r
+#define STR_OPT(LongName, Default)                                     string opt_##LongName; bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Values, Default)                    int opt_##LongName; bool optset_##LongName;\r
+#include "myopts.h"\r
+\r
+static int EnumStrToInt(const OptInfo &Opt, const string &Value)\r
+       {\r
+       const map<string, unsigned> &e = Opt.EnumValues;\r
+       string s;\r
+       for (map<string, unsigned>::const_iterator p = e.begin(); p != e.end(); ++p)\r
+               {\r
+               if (Value == p->first)\r
+                       return p->second;\r
+               s += " " + p->first;\r
+               }\r
+       CmdLineErr("--%s %s not recognized, valid are: %s",\r
+         Opt.LongName.c_str(), Value.c_str(), s.c_str());\r
+       ureturn(-1);\r
+       }\r
+\r
+static void SetOpt(OptInfo &Opt, const string &Value)\r
+       {\r
+       *Opt.OptSet = true;\r
+       switch (Opt.Type)\r
+               {\r
+       case OT_Int:\r
+               {\r
+               *(int *) Opt.Value = atoi(Value.c_str());\r
+               break;\r
+               }\r
+       case OT_Uns:\r
+               {\r
+               unsigned uValue = 0;\r
+               int n = sscanf(Value.c_str(), "%u", &uValue);\r
+               if (n != 1)\r
+                       CmdLineErr("Invalid value '%s' for --%s",\r
+                         Value.c_str(), Opt.LongName.c_str());\r
+               *(unsigned *) Opt.Value = uValue;\r
+               break;\r
+               }\r
+       case OT_Float:\r
+               {\r
+               *(double *) Opt.Value = atof(Value.c_str());\r
+               break;\r
+               }\r
+       case OT_Str:\r
+               {\r
+               *(string *) Opt.Value = Value;\r
+               break;\r
+               }\r
+       case OT_Enum:\r
+               {\r
+               *(int *) Opt.Value = EnumStrToInt(Opt, Value);\r
+               break;\r
+               }\r
+       default:\r
+               asserta(false);\r
+               }\r
+       }\r
+\r
+void LogOpts()\r
+       {\r
+       for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+               Log("%s = ", Opt.LongName.c_str());\r
+               switch (Opt.Type)\r
+                       {\r
+               case OT_Flag:\r
+                       Log("%s", (*(bool *) Opt.Value) ? "yes" : "no");\r
+                       break;\r
+               case OT_Tog:\r
+                       Log("%s", (*(bool *) Opt.Value) ? "on" : "off");\r
+                       break;\r
+               case OT_Int:\r
+                       Log("%d", *(int *) Opt.Value);\r
+                       break;\r
+               case OT_Uns:\r
+                       Log("%u", *(unsigned *) Opt.Value);\r
+                       break;\r
+               case OT_Float:\r
+                       {\r
+                       double Value = *(double *) Opt.Value;\r
+                       if (Value == FLT_MAX)\r
+                               Log("*");\r
+                       else\r
+                               Log("%g", Value);\r
+                       break;\r
+                       }\r
+               case OT_Str:\r
+                       Log("%s", (*(string *) Opt.Value).c_str());\r
+                       break;\r
+               case OT_Enum:\r
+                       Log("%d", *(int *) Opt.Value);\r
+                       break;\r
+               default:\r
+                       asserta(false);\r
+                       }\r
+               Log("\n");\r
+               }\r
+       }\r
+\r
+static void CompilerInfo()\r
+       {\r
+#ifdef _FILE_OFFSET_BITS\r
+    printf("_FILE_OFFSET_BITS=%d\n", _FILE_OFFSET_BITS);\r
+#else\r
+    printf("_FILE_OFFSET_BITS not defined\n");\r
+#endif\r
+\r
+#define x(t)   printf("sizeof(" #t ") = %d\n", (int) sizeof(t));\r
+       x(int)\r
+       x(long)\r
+       x(float)\r
+       x(double)\r
+       x(void *)\r
+       x(off_t)\r
+#undef x\r
+       exit(0);\r
+       }\r
+\r
+void Split(const string &Str, vector<string> &Fields, char Sep)\r
+       {\r
+       Fields.clear();\r
+       const unsigned Length = (unsigned) Str.size();\r
+       string s;\r
+       for (unsigned i = 0; i < Length; ++i)\r
+               {\r
+               char c = Str[i];\r
+               if ((Sep == 0 && isspace(c)) || c == Sep)\r
+                       {\r
+                       if (!s.empty() || Sep != 0)\r
+                               Fields.push_back(s);\r
+                       s.clear();\r
+                       }\r
+               else\r
+                       s.push_back(c);\r
+               }\r
+       if (!s.empty())\r
+               Fields.push_back(s);\r
+       }\r
+\r
+static void GetArgsFromFile(const string &FileName, vector<string> &Args)\r
+       {\r
+       Args.clear();\r
+\r
+       FILE *f = OpenStdioFile(FileName);\r
+       string Line;\r
+       while (ReadLineStdioFile(f, Line))\r
+               {\r
+               size_t n = Line.find('#');\r
+               if (n != string::npos)\r
+                       Line = Line.substr(0, n);\r
+               vector<string> Fields;\r
+               Split(Line, Fields);\r
+               Args.insert(Args.end(), Fields.begin(), Fields.end());\r
+               }\r
+       CloseStdioFile(f);\r
+       }\r
+\r
+void MyCmdLine(int argc, char **argv)\r
+       {\r
+       static unsigned RecurseDepth = 0;\r
+       ++RecurseDepth;\r
+\r
+       DefineFlagOpt("compilerinfo", "Write info about compiler types and #defines to stdout.",\r
+         (void *) &opt_compilerinfo, &optset_compilerinfo);\r
+       DefineFlagOpt("quiet", "Turn off progress messages.", (void *) &opt_quiet, &optset_quiet);\r
+       DefineFlagOpt("version", "Show version and exit.", (void *) &opt_version, &optset_version);\r
+       DefineFlagOpt("logopts", "Log options.", (void *) &opt_logopts, &optset_logopts);\r
+       DefineFlagOpt("help", "Display command-line options.", (void *) &opt_help, &optset_help);\r
+       DefineStrOpt("log", "", "Log file name.", (void *) &opt_log, &optset_log);\r
+\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName)                                             DefineFlagOpt(#LongName, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define TOG_OPT(LongName, Default)                             DefineTogOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define INT_OPT(LongName, Default, Min, Max)   DefineIntOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define UNS_OPT(LongName, Default, Min, Max)   DefineUnsOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define FLT_OPT(LongName, Default, Min, Max)   DefineFloatOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define STR_OPT(LongName, Default)                             DefineStrOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define ENUM_OPT(LongName, Values, Default)            DefineEnumOpt(#LongName, Values, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#include "myopts.h"\r
+\r
+       if (RecurseDepth == 0)\r
+               g_Argv.clear();\r
+\r
+       for (int i = 0; i < argc; ++i) {\r
+               g_Argv.push_back(string(argv[i]));\r
+       }\r
+\r
+       int i = 1;\r
+       for (;;)\r
+               {\r
+               if (i >= argc)\r
+                       break;\r
+               const string &Arg = g_Argv[i];\r
+                       \r
+               if (Arg.empty())\r
+                       continue;\r
+               else if (Arg == "file:" && i + 1 < argc)\r
+                       {\r
+                       const string &FileName = g_Argv[i+1];\r
+                       vector<string> Args;\r
+                       GetArgsFromFile(FileName, Args);\r
+                       for (vector<string>::const_iterator p = Args.begin();\r
+                         p != Args.end(); ++p)\r
+                               {\r
+                               g_Argv.push_back(*p);\r
+                               ++argc;\r
+                               }\r
+                       i += 2;\r
+                       continue;\r
+                       }\r
+               else if (Arg.size() > 1 && Arg[0] == '-')\r
+                       {\r
+                       string LongName = (Arg.size() > 2 && Arg[1] == '-' ? Arg.substr(2) : Arg.substr(1));\r
+                       OptInfo Opt = *GetOptInfo(LongName, true);\r
+                       *Opt.OptSet = true;\r
+                       if (Opt.Type == OT_Flag)\r
+                               {\r
+                               g_Opts.erase(Opt);\r
+                               *(bool *) Opt.Value = true;\r
+                               g_Opts.insert(Opt);\r
+                               ++i;\r
+                               continue;\r
+                               }\r
+                       else if (Opt.Type == OT_Tog)\r
+                               {\r
+                               g_Opts.erase(Opt);\r
+                               if (string("no") + Opt.LongName == LongName)\r
+                                       *(bool *) Opt.Value = false;\r
+                               else\r
+                                       {\r
+                                       asserta(Opt.LongName == LongName);\r
+                                       *(bool *) Opt.Value = true;\r
+                                       }\r
+                               g_Opts.insert(Opt);\r
+                               ++i;\r
+                               continue;\r
+                               }\r
+\r
+                       ++i;\r
+                       if (i >= argc)\r
+                               CmdLineErr("Missing value for option --%s", LongName.c_str());\r
+\r
+                       string Value = g_Argv[i];\r
+                       SetOpt(Opt, Value);\r
+\r
+                       ++i;\r
+                       continue;\r
+                       }\r
+               else\r
+                       CmdLineErr("Expected -option_name or --option_name, got '%s'", Arg.c_str());\r
+               }\r
+\r
+       --RecurseDepth;\r
+       if (RecurseDepth > 0)\r
+               return;\r
+\r
+       if (opt_help)\r
+               Help();\r
+\r
+       if (opt_compilerinfo)\r
+               CompilerInfo();\r
+\r
+       SetLogFileName(opt_log);\r
+\r
+       if (opt_log != "")\r
+               {\r
+               for (int i = 0; i < argc; ++i)\r
+                       Log("%s%s", i == 0 ? "" : " ", g_Argv[i].c_str());\r
+               Log("\n");\r
+               time_t Now = time(0);\r
+               struct tm *t = localtime(&Now);\r
+               const char *s = asctime(t);\r
+               Log("Started %s", s); // there is a newline in s\r
+               Log("Version " MY_VERSION ".%s\n", SVN_VERSION);\r
+               Log("\n");\r
+               }\r
+\r
+       if (opt_logopts)\r
+               LogOpts();\r
+       }\r
+\r
+double Pct(double x, double y)\r
+       {\r
+       if (y == 0.0f)\r
+               return 0.0f;\r
+       return (x*100.0f)/y;\r
+       }\r
+\r
+void GetCmdLine(string &s)\r
+       {\r
+       s.clear();\r
+       for (unsigned i = 0; i < SIZE(g_Argv); ++i)\r
+               {\r
+               if (i > 0)\r
+                       s += " ";\r
+               s += g_Argv[i];\r
+               }\r
+       }\r
+\r
+char *mystrsave(const char *s)\r
+       {\r
+       unsigned n = unsigned(strlen(s));\r
+       char *t = myalloc(char, n+1);\r
+       memcpy(t, s, n+1);\r
+       return t;\r
+       }\r
+\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces)\r
+       {\r
+       for (unsigned i = 0; i < prefixspaces; ++i)\r
+               Log(" ");\r
+       if (u == UINT_MAX)\r
+               Log("%*.*s", w, w, "*");\r
+       else\r
+               Log("%*u", w, u);\r
+       }\r
+\r
+void Logf(float x, unsigned w, unsigned prefixspaces)\r
+       {\r
+       for (unsigned i = 0; i < prefixspaces; ++i)\r
+               Log(" ");\r
+       if (x == FLT_MAX)\r
+               Log("%*.*s", w, w, "*");\r
+       else\r
+               Log("%*.2f", w, x);\r
+       }\r
+\r
+static uint32 g_SLCG_state = 1;\r
+\r
+// Numerical values used by Microsoft C, according to wikipedia:\r
+// http://en.wikipedia.org/wiki/Linear_congruential_generator\r
+static uint32 g_SLCG_a = 214013;\r
+static uint32 g_SLCG_c = 2531011;\r
+\r
+// Simple Linear Congruential Generator\r
+// Bad properties; used just to initialize the better generator.\r
+static uint32 SLCG_rand()\r
+       {\r
+       g_SLCG_state = g_SLCG_state*g_SLCG_a + g_SLCG_c;\r
+       return g_SLCG_state;\r
+       }\r
+\r
+static void SLCG_srand(uint32 Seed)\r
+       {\r
+       g_SLCG_state = Seed;\r
+       for (int i = 0; i < 10; ++i)\r
+               SLCG_rand();\r
+       }\r
+\r
+/***\r
+A multiply-with-carry random number generator, see:\r
+http://en.wikipedia.org/wiki/Multiply-with-carry\r
+\r
+The particular multipliers used here were found on\r
+the web where they are attributed to George Marsaglia.\r
+***/\r
+\r
+static bool g_InitRandDone = false;\r
+static uint32 g_X[5];\r
+\r
+uint32 RandInt32()\r
+       {\r
+       InitRand();\r
+\r
+       uint64 Sum = 2111111111*(uint64) g_X[3] + 1492*(uint64) g_X[2] +\r
+         1776*(uint64) g_X[1] + 5115*(uint64) g_X[0] + g_X[4];\r
+       g_X[3] = g_X[2];\r
+       g_X[2] = g_X[1];\r
+       g_X[1] = g_X[0];\r
+       g_X[4] = (uint32) (Sum >> 32);\r
+       g_X[0] = (uint32) Sum;\r
+       return g_X[0];\r
+       }\r
+\r
+unsigned randu32()\r
+       {\r
+       return (unsigned) RandInt32();\r
+       }\r
+\r
+void InitRand()\r
+       {\r
+       if (g_InitRandDone)\r
+               return;\r
+// Do this first to avoid recursion\r
+       g_InitRandDone = true;\r
+\r
+       unsigned Seed = (optset_randseed ? opt_randseed : (unsigned) (time(0)*getpid()));\r
+       Log("RandSeed=%u\n", Seed);\r
+       SLCG_srand(Seed);\r
+\r
+       for (unsigned i = 0; i < 5; i++)\r
+               g_X[i] = SLCG_rand();\r
+\r
+       for (unsigned i = 0; i < 100; i++)\r
+               RandInt32();\r
+       }\r
+\r
+// MUST COME AT END BECAUSE OF #undef\r
+#if    RCE_MALLOC\r
+#undef mymalloc\r
+#undef myfree\r
+#undef myfree2\r
+void *mymalloc(unsigned bytes, const char *FileName, int Line)\r
+       {\r
+       void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+       return rce_malloc(bytes, FileName, Line);\r
+       }\r
+\r
+void myfree(void *p, const char *FileName, int Line)\r
+       {\r
+       void rce_free(void *p, const char *FileName, int Line);\r
+       rce_free(p, FileName, Line);\r
+       }\r
+\r
+void myfree2(void *p, unsigned bytes, const char *FileName, int Line)\r
+       {\r
+       void rce_free(void *p, const char *FileName, int Line);\r
+       rce_free(p, FileName, Line);\r
+       }\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes)\r
+       {\r
+       ++g_NewCalls;\r
+       if (g_InitialMemUseBytes == 0)\r
+               g_InitialMemUseBytes = GetMemUseBytes();\r
+\r
+       g_TotalAllocBytes += bytes;\r
+       g_NetBytes += bytes;\r
+       if (g_NetBytes > g_MaxNetBytes)\r
+               {\r
+               if (g_NetBytes > g_MaxNetBytes + 10000000)\r
+                       GetMemUseBytes();//to force update of peak\r
+               g_MaxNetBytes = g_NetBytes;\r
+               }\r
+       void *p = malloc(bytes);\r
+       //void *p = _malloc_dbg(bytes, _NORMAL_BLOCK, __FILE__, __LINE__);\r
+       if (0 == p)\r
+               {\r
+               double b = GetMemUseBytes();\r
+               fprintf(stderr, "\nOut of memory mymalloc(%u), curr %.3g bytes",\r
+                 (unsigned) bytes, b);\r
+               void LogAllocs();\r
+               LogAllocs();\r
+#if DEBUG && defined(_MSC_VER)\r
+               asserta(_CrtCheckMemory());\r
+#endif\r
+               Die("Out of memory, mymalloc(%u), curr %.3g bytes\n",\r
+                 (unsigned) bytes, b);\r
+               }\r
+       return p;\r
+       }\r
+\r
+void myfree(void *p)\r
+       {\r
+       if (p == 0)\r
+               return;\r
+       free(p);\r
+       //_free_dbg(p, _NORMAL_BLOCK);\r
+       }\r
+\r
+void myfree2(void *p, unsigned bytes)\r
+       {\r
+       ++g_FreeCalls;\r
+       g_TotalFreeBytes += bytes;\r
+       g_NetBytes -= bytes;\r
+\r
+       if (p == 0)\r
+               return;\r
+       free(p);\r
+       }\r
+#endif\r
diff --git a/uchime_src/myutils.h b/uchime_src/myutils.h
new file mode 100644 (file)
index 0000000..b63ad3c
--- /dev/null
@@ -0,0 +1,274 @@
+#ifndef myutils_h\r
+#define myutils_h\r
+\r
+#define RCE_MALLOC     0\r
+
+#include <stdio.h>\r
+#include <sys/types.h>\r
+#include <string>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <vector>\r
+#include <math.h>\r
+#include <stdarg.h>\r
+#include <cstdlib>\r
+#include <climits>\r
+\r
+#ifndef _MSC_VER\r
+#include <inttypes.h>\r
+#endif\r
+\r
+using namespace std;\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#pragma warning(disable: 4996) // deprecated functions\r
+#define _CRT_SECURE_NO_DEPRECATE       1\r
+#endif\r
+\r
+#if defined(_DEBUG) && !defined(DEBUG)\r
+#define DEBUG  1\r
+#endif\r
+\r
+#if defined(DEBUG) && !defined(_DEBUG)\r
+#define _DEBUG 1\r
+#endif\r
+\r
+#ifndef NDEBUG\r
+#define        DEBUG   1\r
+#define        _DEBUG  1\r
+#endif\r
+\r
+typedef unsigned char byte;\r
+typedef unsigned short uint16;\r
+typedef unsigned uint32;\r
+typedef int int32;\r
+typedef double float32;\r
+typedef signed char int8;\r
+typedef unsigned char uint8;\r
+\r
+#ifdef _MSC_VER\r
+\r
+typedef __int64 int64;\r
+typedef unsigned __int64 uint64;\r
+\r
+#define INT64_PRINTF           "lld"\r
+#define UINT64_PRINTF          "llu"\r
+\r
+#define SIZE_T_PRINTF          "u"\r
+#define OFF64_T_PRINTF         "lld"\r
+\r
+#define INT64_PRINTFX          "llx"\r
+#define UINT64_PRINTFX         "llx"\r
+\r
+#define SIZE_T_PRINTFX         "x"\r
+#define OFF64_T_PRINTFX                "llx"\r
+\r
+#elif defined(__x86_64__)\r
+\r
+typedef long int64;\r
+typedef unsigned long uint64;\r
+\r
+#define INT64_PRINTF           "ld"\r
+#define UINT64_PRINTF          "lu"\r
+\r
+#define SIZE_T_PRINTF          "lu"\r
+#define OFF64_T_PRINTF         "ld"\r
+\r
+#define INT64_PRINTFX          "lx"\r
+#define UINT64_PRINTFX         "lx"\r
+\r
+#define SIZE_T_PRINTFX         "lx"\r
+#define OFF64_T_PRINTFX                "lx"\r
+\r
+#else\r
+\r
+typedef long long int64;\r
+typedef unsigned long long uint64;\r
+\r
+#define INT64_PRINTF           "lld"\r
+#define UINT64_PRINTF          "llu"\r
+\r
+#define SIZE_T_PRINTF          "u"\r
+#define OFF64_T_PRINTF         "lld"\r
+\r
+#define INT64_PRINTFX          "llx"\r
+#define UINT64_PRINTFX         "llx"\r
+\r
+#define SIZE_T_PRINTFX         "x"\r
+#define OFF64_T_PRINTFX                "llx"\r
+#endif\r
+\r
+#define d64            INT64_PRINTF\r
+#define        u64             UINT64_PRINTF\r
+#define        x64             UINT64_PRINTFX\r
+\r
+// const uint64 UINT64_MAX                     = (~((uint64) 0));\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line);\r
+#undef  assert\r
+#ifdef  NDEBUG\r
+#define assert(exp)     ((void)0)\r
+#define myassert(exp)     ((void)0)\r
+#else\r
+#define assert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#define myassert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#endif\r
+#define asserta(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+\r
+#define ureturn(x)     return (x)\r
+\r
+#define NotUsed(v)     ((void *) &v)\r
+\r
+// pom=plus or minus, tof=true or false\r
+static inline char pom(bool Plus)      { return Plus ? '+' : '-'; }\r
+static inline char tof(bool x)         { return x ? 'T' : 'F'; }\r
+static inline char yon(bool x)         { return x ? 'Y' : 'N'; }\r
+unsigned GetElapsedSecs();\r
+\r
+#if    RCE_MALLOC\r
+\r
+void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+void rce_free(void *p, const char *FileName, int LineNr);\r
+void rce_chkmem();\r
+\r
+void rce_dumpmem_(const char *FileName, int LineNr);\r
+#define rce_dumpmem()          rce_dumpmem_(__FILE__, __LINE__)\r
+\r
+void rce_assertvalidptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_assertvalidptr(p)  rce_assertvalidptr_(p, __FILE__, __LINE__)\r
+\r
+void rce_dumpptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_dumpptr(p) rce_dumpptr_(p, __FILE__, __LINE__)\r
+\r
+#define mymalloc(n)            rce_malloc((n), __FILE__, __LINE__)\r
+#define myfree(p)              rce_free(p, __FILE__, __LINE__)\r
+#define myfree2(p,n)   rce_free(p, __FILE__, __LINE__)\r
+#define myalloc(t, n)  (t *) rce_malloc((n)*sizeof(t), __FILE__, __LINE__)\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes);\r
+void myfree2(void *p, unsigned Bytes);\r
+void myfree(void *p);\r
+#define rce_chkmem()   /* empty */\r
+#define myalloc(t, n)  (t *) mymalloc((n)*sizeof(t))\r
+#endif // RCE_MALLOC\r
+\r
+#define SIZE(c)        unsigned((c).size())\r
+\r
+bool myisatty(int fd);\r
+\r
+#ifdef _MSC_VER\r
+#define off_t  __int64\r
+#endif\r
+\r
+FILE *OpenStdioFile(const string &FileName);\r
+FILE *CreateStdioFile(const string &FileName);\r
+bool CanSetStdioFilePos(FILE *f);\r
+void CloseStdioFile(FILE *f);\r
+void SetStdioFilePos(FILE *f, off_t Pos);\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes);\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, string &Line);\r
+byte *ReadAllStdioFile(FILE *f, off_t &FileSize);\r
+byte *ReadAllStdioFile(const string &FileName, off_t &FileSize);\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo);\r
+void FlushStdioFile(FILE *f);\r
+bool StdioFileExists(const string &FileName);\r
+off_t GetStdioFilePos(FILE *f);\r
+off_t GetStdioFileSize(FILE *f);\r
+void LogStdioFileState(FILE *f);\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo);\r
+void DeleteStdioFile(const string &FileName);\r
+\r
+void myvstrprintf(string &Str, const char *szFormat, va_list ArgList);\r
+void myvstrprintf(string &Str, const char *szFormat, ...);\r
+\r
+void SetLogFileName(const string &FileName);\r
+void Log(const char *szFormat, ...);\r
+\r
+void Die(const char *szFormat, ...);\r
+void Warning(const char *szFormat, ...);\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...);\r
+void Progress(const char *szFormat, ...);\r
+void Progress(const string &Str);\r
+void ProgressLog(const char *szFormat, ...);\r
+void ProgressExit();\r
+\r
+char *mystrsave(const char *s);\r
+\r
+double GetPeakMemUseBytes();\r
+\r
+// Are two floats equal to within epsilon?\r
+const double epsilon = 0.01;\r
+inline bool feq(double x, double y, double epsilon)\r
+       {\r
+       if (fabs(x) > 10000)\r
+               epsilon = fabs(x)/10000;\r
+       if (fabs(x - y) > epsilon)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+inline bool feq(double x, double y)\r
+       {\r
+       if (x < -1e6 && y < -1e6)\r
+               return true;\r
+       double e = epsilon;\r
+       if (fabs(x) > 10000)\r
+               e = fabs(x)/10000;\r
+       if (fabs(x - y) > e)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+#define asserteq(x, y) assert(feq(x, y))\r
+#define assertaeq(x, y)        asserta(feq(x, y))\r
+\r
+#define        zero(a, n)      memset(a, 0, n*sizeof(a[0]))\r
+\r
+void InitRand();\r
+unsigned randu32();\r
+void Split(const string &Str, vector<string> &Fields, char Sep = 0);\r
+double Pct(double x, double y);\r
+double GetMemUseBytes();\r
+const char *MemBytesToStr(double Bytes);\r
+const char *IntToStr(unsigned i);\r
+const char *FloatToStr(double d);\r
+const char *SecsToStr(double Secs);\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces = 2);\r
+void Logf(float x, unsigned w, unsigned prefixspaces = 2);\r
+const char *SecsToHHMMSS(int Secs);\r
+\r
+void MyCmdLine(int argc, char **argv);\r
+void CmdLineErr(const char *Format, ...);\r
+void Help();\r
+void GetCmdLine(string &s);\r
+\r
+#define FLAG_OPT(LongName)                                             extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default)                             extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max)   extern int opt_##LongName; extern bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max)   extern unsigned opt_##LongName; extern bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max)   extern double opt_##LongName; extern bool optset_##LongName;\r
+#define STR_OPT(LongName, Default)                             extern string opt_##LongName; extern bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Default, Values)            extern int opt_##LongName; extern bool optset_##LongName;\r
+#include "myopts.h"\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+\r
+extern const char *SVN_VERSION;\r
+extern const char *SVN_MODS;\r
+extern bool opt_quiet;
+extern bool opt_version;
+extern FILE *g_fLog;
+\r
+#endif // myutils_h\r
diff --git a/uchime_src/orf.h b/uchime_src/orf.h
new file mode 100644 (file)
index 0000000..90b29d1
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef orf_h\r
+#define orf_h\r
+\r
+#include "alpha.h"\r
+\r
+struct ORFData\r
+       {\r
+       const byte *NucSeq;\r
+       const byte *AminoSeq;\r
+       int Frame;\r
+       unsigned NucL;\r
+       unsigned AminoL;\r
+       unsigned NucLo;\r
+       unsigned NucHi;\r
+       ORFData *Next;\r
+\r
+       unsigned GetNucPosFirstBase() const;\r
+       unsigned GetAAPos(unsigned NucPos) const;\r
+       unsigned GetCodex(unsigned NucPos) const;\r
+       unsigned GetNucLo(unsigned AALo, unsigned AAHi) const;\r
+       unsigned GetNucHi(unsigned AALo, unsigned AAHi) const;\r
+       unsigned GetAALo(unsigned NucLo, unsigned NucHi) const;\r
+       unsigned GetAAHi(unsigned NucLo, unsigned NucHi) const;\r
+       unsigned GetNucPosFirstBaseInCodon(unsigned AAPos) const;\r
+       unsigned GetNucPosLastBaseInCodon(unsigned AAPos) const;\r
+       unsigned RoundToCodonLo(unsigned NucPos) const;\r
+       unsigned RoundToCodonHi(unsigned NucPos) const;\r
+       void LogMe() const;\r
+       void LogMe2() const;\r
+       };\r
+\r
+const byte ORFEND = '.';\r
+\r
+void GetORFs(const byte *NucSeq, unsigned NucL, vector<ORFData> &ORFs,\r
+  unsigned ORFStyle, int FindFrame, int Sign);\r
+\r
+#endif // orf_h\r
diff --git a/uchime_src/out.h b/uchime_src/out.h
new file mode 100644 (file)
index 0000000..4ca50c7
--- /dev/null
@@ -0,0 +1,134 @@
+#ifndef out_h\r
+#define out_h\r
+\r
+#include "seq.h"\r
+#include "hsp.h"\r
+#include "orf.h"\r
+#include "path.h"\r
+#include <float.h>\r
+\r
+struct AlnData\r
+       {\r
+/***\r
+SA.Seq and SB.Seq align.\r
+Reverse strand stuff for nucleotides is handled like this:\r
+       SA.RevComp must be false.\r
+       If SB.RevComp is true, then SA.Seq is r.c.'d relative to the sequence in\r
+       the input file (query or db). If so, coordinates in HSP refer to SB.Seq\r
+       so are also r.c.'d relative to the original sequence.\r
+***/\r
+       SeqData SA;\r
+       SeqData SB;\r
+       HSPData HSP;\r
+       const char *Path;\r
+       char IdDesc[256];\r
+\r
+       float FractId;\r
+       float RawScore;\r
+       float BitScore;\r
+       float Evalue;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("AD: ");\r
+               HSP.LogMe();\r
+               Log(" %s,%s\n", SA.Label, SB.Label);\r
+               }\r
+       };\r
+\r
+bool OnDerepHit(const SeqData &SA, const SeqData &SB);\r
+\r
+bool OnLocalUngappedHit(const SeqData &SA, const SeqData &SB,\r
+  const HSPData &HSP, float &Evalue, float &FractId);\r
+\r
+bool OnLocalGappedHit(const SeqData &SA, const SeqData &SB,\r
+  const HSPData &HSP, const PathData &PD, float &Evalue, float &FractId);\r
+\r
+bool OnGlobalHit(const SeqData &SA, const SeqData &SB, const PathData &PD,\r
+  float &FractId);\r
+\r
+void OnReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+  const char *Path);\r
+\r
+void OnNotMatched(const char *Label, unsigned L);\r
+void OnNewCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnNewLibCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnLibCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+  const char *Label);\r
+void OnNewCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+  const char *Label);\r
+void OnChainCov(const SeqData &NucleoSD, const SeqData &TargetSD,\r
+  float Score, float ChainCov);\r
+\r
+void SetUserFieldIndexes(const string &s);\r
+\r
+void BlastOut(FILE *f, const AlnData &AD);\r
+void Blast6Out(FILE *f, const AlnData &AD);\r
+void FastaPairOut(FILE *f, const AlnData &AD);\r
+void UserOut(FILE *f, const AlnData &AD);\r
+\r
+void BlastOutORF(FILE *f, const AlnData &AD);\r
+\r
+void OpenOutputFiles();\r
+void CloseOutputFiles();\r
+void SetLibSeedCount(unsigned DBSeqCount);\r
+const char *UserFieldIndexToStr(unsigned i);\r
+\r
+extern float **g_SubstMx;\r
+\r
+static char g_IdChar = '|';\r
+static char g_DiffChar = ' ';\r
+\r
+static inline char GetSymN(byte Letter1, byte Letter2)\r
+       {\r
+       Letter1 = toupper(Letter1);\r
+       Letter2 = toupper(Letter2);\r
+       if (Letter1 == Letter2)\r
+               return g_IdChar;\r
+       return g_DiffChar;\r
+       }\r
+\r
+static inline char GetSymA(byte Letter1, byte Letter2)\r
+       {\r
+       Letter1 = toupper(Letter1);\r
+       Letter2 = toupper(Letter2);\r
+       if (Letter1 == Letter2)\r
+               return '|';\r
+\r
+       float Score = g_SubstMx[Letter1][Letter2];\r
+       if (Score >= 2.0f)\r
+               return ':';\r
+       if (Score > 0.0f)\r
+               return '.';\r
+       return ' ';\r
+       }\r
+\r
+static inline char GetSym(byte Letter1, byte Letter2, bool Nucleo)\r
+       {\r
+       if (Nucleo)\r
+               return GetSymN(Letter1, Letter2);\r
+       else\r
+               return GetSymA(Letter1, Letter2);\r
+       }\r
+\r
+static unsigned GetNDig(unsigned n)\r
+       {\r
+       if (n < 10)\r
+               return 1;\r
+       if (n < 100)\r
+               return 2;\r
+       if (n < 1000)\r
+               return 3;\r
+       if (n < 10000)\r
+               return 4;\r
+       if (n < 100000)\r
+               return 5;\r
+       if (n < 1000000)\r
+               return 6;\r
+       return 10;\r
+       }\r
+\r
+extern unsigned *g_UserFieldIndexes;\r
+extern unsigned g_UserFieldCount;\r
+\r
+#endif // out_h\r
diff --git a/uchime_src/path.cpp b/uchime_src/path.cpp
new file mode 100644 (file)
index 0000000..9340344
--- /dev/null
@@ -0,0 +1,151 @@
+#include "myutils.h"\r
+#include "path.h"\r
+#include "timing.h"\r
+\r
+#define TRACE  0\r
+\r
+const unsigned PathMagic = 0x9A783A16;\r
+\r
+struct PathBuffer\r
+       {\r
+       unsigned Magic;\r
+       char *Buffer;\r
+       unsigned Size;\r
+       bool InUse;\r
+       };\r
+\r
+static PathBuffer **g_PathBuffers;\r
+static unsigned g_PathBufferSize;\r
+\r
+static char *AllocBuffer(unsigned Size)\r
+       {\r
+       if (Size == 0)\r
+               return 0;\r
+\r
+// Is a free buffer that is big enough?\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = g_PathBuffers[i];\r
+               asserta(PB->Magic == PathMagic);\r
+               if (!PB->InUse)\r
+                       {\r
+                       if (PB->Size >= Size)\r
+                               {\r
+                               PB->InUse = true;\r
+                               return PB->Buffer;\r
+                               }\r
+                       if (PB->Buffer == 0)\r
+                               {\r
+                               unsigned Size2 = Size + 1024;\r
+                               PB->Buffer = MYALLOC(char, Size2, Path);\r
+                               PB->Size = Size2;\r
+                               PB->InUse = true;\r
+                               return PB->Buffer;\r
+                               }\r
+                       }\r
+               }\r
+\r
+// No available buffer, must expand g_PathBuffers[]\r
+       unsigned NewPathBufferSize = g_PathBufferSize + 1024;\r
+       PathBuffer **NewPathBuffers = MYALLOC(PathBuffer *, NewPathBufferSize, Path);\r
+       \r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               NewPathBuffers[i] = g_PathBuffers[i];\r
+\r
+       for (unsigned i = g_PathBufferSize; i < NewPathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = MYALLOC(PathBuffer, 1, Path);\r
+               PB->Magic = PathMagic;\r
+               PB->Buffer = 0;\r
+               PB->Size = 0;\r
+               PB->InUse = false;\r
+               NewPathBuffers[i] = PB;\r
+               }\r
+\r
+       PathBuffer *PB = NewPathBuffers[g_PathBufferSize];\r
+\r
+       MYFREE(g_PathBuffers, g_PathBufferSize, Path);\r
+       g_PathBuffers = NewPathBuffers;\r
+       g_PathBufferSize = NewPathBufferSize;\r
+\r
+       asserta(!PB->InUse && PB->Buffer == 0);\r
+\r
+       unsigned Size2 = Size + 1024;\r
+       PB->Buffer = MYALLOC(char, Size2, Path);\r
+       PB->Size = Size2;\r
+       PB->InUse = true;\r
+       return PB->Buffer;\r
+       }\r
+\r
+static void FreeBuffer(char *Buffer)\r
+       {\r
+       if (Buffer == 0)\r
+               return;\r
+\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = g_PathBuffers[i];\r
+               if (PB->Buffer == Buffer)\r
+                       {\r
+                       asserta(PB->InUse);\r
+                       PB->InUse = false;\r
+                       return;\r
+                       }\r
+               }\r
+\r
+       Die("FreeBuffer, not found");\r
+       }\r
+\r
+void PathData::Alloc(unsigned MaxLen)\r
+       {\r
+       if (MaxLen < Bytes)\r
+               return;\r
+\r
+       StartTimer(PathAlloc);\r
+       if (Bytes > 0)\r
+               {\r
+               FreeBuffer(Front);\r
+               }\r
+\r
+       Bytes = MaxLen + 1;\r
+       Front = AllocBuffer(Bytes);\r
+       Back = Front + Bytes - 1;\r
+       Start = 0;\r
+       EndTimer(PathAlloc);\r
+       }\r
+\r
+void PathData::Free()\r
+       {\r
+       FreeBuffer(Front);\r
+       Front = 0;\r
+       Start = 0;\r
+       Back = 0;\r
+       }\r
+\r
+void PathData::Copy(const PathData &rhs)\r
+       {\r
+       Alloc(rhs.Bytes);\r
+       strcpy(Front, rhs.Front);\r
+       Start = Front + (rhs.Start - rhs.Front);\r
+       }\r
+\r
+void PathData::FromStr(const char *PathStr)\r
+       {\r
+       asserta(PathStr != 0);\r
+       unsigned NeededBytes = (unsigned) strlen(PathStr) + 1;\r
+       Alloc(NeededBytes);\r
+       strcpy(Front, PathStr);\r
+       Start = Front;\r
+       }\r
+\r
+void LogPathStats()\r
+       {\r
+       Log("\n");\r
+       unsigned Bytes = 0;\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               const PathBuffer *PB = g_PathBuffers[i];\r
+               Bytes += PB->Size;\r
+               }\r
+       Log("%u paths allocated, total memory %u bytes\n", g_PathBufferSize, Bytes);\r
+       }\r
diff --git a/uchime_src/path.h b/uchime_src/path.h
new file mode 100644 (file)
index 0000000..f63be7e
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef path_h\r
+#define path_h\r
+\r
+struct PathData\r
+       {\r
+private:\r
+       PathData(PathData &);\r
+       PathData &operator=(PathData &);\r
+\r
+public:\r
+       char *Start;\r
+       char *Front;\r
+       char *Back;\r
+       unsigned Bytes;\r
+\r
+public:\r
+       PathData()\r
+               {\r
+               Clear(true);\r
+               }\r
+       ~PathData()\r
+               {\r
+               Free();\r
+               }\r
+       void Free();\r
+       void Alloc(unsigned MaxLen);\r
+       void Clear(bool ctor = false)\r
+               {\r
+               Start = 0;\r
+               if (ctor)\r
+                       {\r
+                       Front = 0;\r
+                       Back = 0;\r
+                       Bytes = 0;\r
+                       }\r
+               else\r
+                       Free();\r
+               }\r
+       void Copy(const PathData &rhs);\r
+       void FromStr(const char *PathStr);\r
+       void Reverse()\r
+               {\r
+               asserta(Start != 0);\r
+               unsigned L = (unsigned) strlen(Start);\r
+               for (unsigned k = 0; k < L/2; ++k)\r
+                       {\r
+                       char c = Start[k];\r
+                       Start[k] = Start[L-k-1];\r
+                       Start[L-k-1] = c;\r
+                       }\r
+               }\r
+       void SetEmpty()\r
+               {\r
+               Start = 0;\r
+               }\r
+\r
+       bool IsEmpty() const\r
+               {\r
+               return Start == 0;\r
+               }\r
+       };\r
+\r
+#endif // path_h\r
diff --git a/uchime_src/searchchime.cpp b/uchime_src/searchchime.cpp
new file mode 100644 (file)
index 0000000..c00a9c4
--- /dev/null
@@ -0,0 +1,304 @@
+#include "myutils.h"\r
+#include "ultra.h"\r
+#include "chime.h"\r
+#include "uc.h"\r
+#include "dp.h"\r
+#include <set>\r
+#include <algorithm>\r
+\r
+#define TRACE  0\r
+\r
+extern FILE *g_fUChime;\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+  vector<unsigned> &Parents);\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB, ChimeHit2 &Hit);\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);\r
+\r
+static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,\r
+  vector<unsigned> &IdVec, unsigned d)\r
+       {\r
+       IdVec.clear();\r
+       const unsigned ColCount = SIZE(Path);\r
+\r
+       const byte *Q = QSD.Seq;\r
+       const byte *P = PSD.Seq;\r
+\r
+       const unsigned QL = QSD.L;\r
+       const unsigned PL = PSD.L;\r
+\r
+       if (QL <= d)\r
+               {\r
+               IdVec.resize(QSD.L, 0);\r
+               return;\r
+               }\r
+\r
+       unsigned QPos = 0;\r
+       unsigned PPos = 0;\r
+\r
+       vector<bool> SameVec;\r
+       SameVec.reserve(QL);\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char c = Path[Col];\r
+\r
+               bool Same = false;\r
+               if (c == 'M')\r
+                       {\r
+                       byte q = Q[QPos];\r
+                       byte p = P[PPos];\r
+                       Same = (toupper(q) == toupper(p));\r
+                       }\r
+\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       ++QPos;\r
+                       SameVec.push_back(Same);\r
+                       }\r
+\r
+               if (c == 'M' || c == 'I')\r
+                       ++PPos;\r
+               }\r
+\r
+       asserta(SIZE(SameVec) == QL);\r
+\r
+       unsigned n = 0;\r
+       for (unsigned QPos = 0; QPos < d; ++QPos)\r
+               {\r
+               if (SameVec[QPos])\r
+                       ++n;\r
+               IdVec.push_back(n);\r
+               }\r
+\r
+       for (unsigned QPos = d; QPos < QL; ++QPos)\r
+               {\r
+               if (SameVec[QPos])\r
+                       ++n;\r
+               IdVec.push_back(n);\r
+               if (SameVec[QPos-d])\r
+                       --n;\r
+               }\r
+       asserta(SIZE(IdVec) == QL);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("\n");\r
+       Log("GetSmoothedIdVec\n");\r
+       unsigned QPos = 0;\r
+       unsigned PPos = 0;\r
+       Log("Q P  Same       Id\n");\r
+       Log("- -  ----  -------\n");\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char c = Path[Col];\r
+\r
+               bool Same = false;\r
+               if (c == 'M')\r
+                       {\r
+                       byte q = Q[QPos];\r
+                       byte p = P[PPos];\r
+                       Same = (toupper(q) == toupper(p));\r
+                       Log("%c %c  %4c  %7d\n", q, p, tof(Same), IdVec[QPos]);\r
+                       }\r
+\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PPos;\r
+               }\r
+       }\r
+#endif\r
+       }\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+  float MinFractId, ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+       Hit.QLabel = QSD.Label;\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("\n");\r
+               Log("SearchChime()\n");\r
+               Log("Query>%s\n", QSD.Label);\r
+               }\r
+\r
+       vector<unsigned> Parents;\r
+       GetCandidateParents(U, QSD, QAb, Parents);\r
+\r
+       unsigned ParentCount = SIZE(Parents);\r
+       if (ParentCount <= 1)\r
+               {\r
+               if (opt_verbose)\r
+                       Log("%u candidate parents, done.\n", ParentCount);\r
+               return false;\r
+               }\r
+\r
+       if (opt_fastalign)\r
+               HF.SetA(QSD);\r
+       HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);\r
+\r
+       unsigned ChunkLength;\r
+       vector<unsigned> ChunkLos;\r
+       GetChunkInfo(QSD.L, ChunkLength, ChunkLos);\r
+       const unsigned ChunkCount = SIZE(ChunkLos);\r
+\r
+       vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);\r
+       vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);\r
+\r
+       vector<SeqData> PSDs;\r
+       vector<string> Paths;\r
+       double TopPctId = 0.0;\r
+       unsigned TopParentIndex = UINT_MAX;\r
+       unsigned QL = QSD.L;\r
+       vector<unsigned> MaxIdVec(QL, 0);\r
+       for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+               {\r
+               unsigned ParentSeqIndex = Parents[ParentIndex];\r
+\r
+               SeqData PSD;\r
+               //PSD.Label = U.GetSeedLabel(ParentSeqIndex);\r
+               //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);\r
+               //PSD.L = U.GetSeedLength(ParentSeqIndex);\r
+               //PSD.Index = ParentSeqIndex;\r
+               U.GetSeqData(ParentSeqIndex, PSD);\r
+               PSDs.push_back(PSD);\r
+\r
+               if (opt_fastalign)\r
+                       HF.SetB(PSD);\r
+\r
+               PathData PD;\r
+\r
+               float HSPId;\r
+               bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);\r
+               if (!Found)\r
+                       {\r
+                       Paths.push_back("");                            \r
+                       continue;\r
+                       }\r
+\r
+               double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);\r
+               if (opt_selfid && PctId == 100.0)\r
+                       {\r
+                       Paths.push_back("");                            \r
+                       continue;\r
+                       }\r
+\r
+               if (PctId > TopPctId)\r
+                       {\r
+                       TopParentIndex = ParentIndex;\r
+                       TopPctId = PctId;\r
+                       if (TopPctId >= 100.0 - opt_mindiv)\r
+                               {\r
+                               if (opt_verbose)\r
+                                       {\r
+                                       Log("  %.1f%%  >%s\n", TopPctId, PSD.Label);\r
+                                       Log("  Top hit exceeds ctl threshold, done.\n");\r
+                                       return false;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+               string Path = PD.Start;\r
+               Paths.push_back(Path);\r
+\r
+               vector<unsigned> IdVec;\r
+               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                       if (IdVec[QPos] > MaxIdVec[QPos])\r
+                               MaxIdVec[QPos] = IdVec[QPos];\r
+               }\r
+\r
+       vector<unsigned> BestParents;\r
+       for (unsigned k = 0; k < opt_maxp; ++k)\r
+               {\r
+               unsigned BestParent = UINT_MAX;\r
+               unsigned BestCov = 0;\r
+               for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+                       {\r
+                       const SeqData &PSD = PSDs[ParentIndex];\r
+                       const string &Path = Paths[ParentIndex];\r
+                       if (Path == "")\r
+                               continue;\r
+\r
+                       vector<unsigned> IdVec;\r
+                       GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+                       unsigned Cov = 0;\r
+                       for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                               if (IdVec[QPos] == MaxIdVec[QPos])\r
+                                       ++Cov;\r
+\r
+                       if (Cov > BestCov)\r
+                               {\r
+                               BestParent = ParentIndex;\r
+                               BestCov = Cov;\r
+                               }\r
+                       }\r
+\r
+               if (BestParent == UINT_MAX)\r
+                       break;\r
+\r
+               BestParents.push_back(BestParent);\r
+               vector<unsigned> IdVec;\r
+\r
+               const SeqData &PSD = PSDs[BestParent];\r
+               const string &Path = Paths[BestParent];\r
+               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                       if (IdVec[QPos] == MaxIdVec[QPos])\r
+                               MaxIdVec[QPos] = UINT_MAX;\r
+               }\r
+\r
+       unsigned BestParentCount = SIZE(BestParents);\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("%u/%u best parents\n", BestParentCount, ParentCount);\r
+               for (unsigned k = 0; k < BestParentCount; ++k)\r
+                       {\r
+                       unsigned i = BestParents[k];\r
+                       Log(" %s\n", PSDs[i].Label);\r
+                       }\r
+               }\r
+\r
+       bool Found = false;\r
+       for (unsigned k1 = 0; k1 < BestParentCount; ++k1)\r
+               {\r
+               unsigned i1 = BestParents[k1];\r
+               asserta(i1 < ParentCount);\r
+\r
+               const SeqData &PSD1 = PSDs[i1];\r
+               const string &Path1 = Paths[i1];\r
+\r
+               for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)\r
+                       {\r
+                       unsigned i2 = BestParents[k2];\r
+                       asserta(i2 < ParentCount);\r
+                       asserta(i2 != i1);\r
+\r
+                       const SeqData &PSD2 = PSDs[i2];\r
+                       const string &Path2 = Paths[i2];\r
+\r
+                       ChimeHit2 Hit2;\r
+                       AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);\r
+                       Hit2.PctIdQT = TopPctId;\r
+\r
+                       if (Hit2.Accept())\r
+                               Found = true;\r
+\r
+                       if (Hit2.Score > Hit.Score)\r
+                               Hit = Hit2;\r
+\r
+                       if (opt_verbose)\r
+                               Hit2.LogMe();\r
+                       }\r
+               }\r
+\r
+       return Found;\r
+       }\r
diff --git a/uchime_src/seq.h b/uchime_src/seq.h
new file mode 100644 (file)
index 0000000..9014641
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef seq_h\r
+#define seq_h\r
+\r
+struct ORFData;\r
+\r
+struct SeqData\r
+       {\r
+       const char *Label;\r
+       const byte *Seq;\r
+       unsigned L;\r
+       unsigned Index;\r
+\r
+// RevComp means that SeqData.Seq is reverse-complemented relative\r
+// to the sequence in the input file (query or db). Coordinates in\r
+// a hit (e.g., AlnData) will be relative to SeqData.Seq, so both\r
+// the sequence and the coordinates should be r.c.'d for output.\r
+       bool RevComp;\r
+       bool Nucleo;\r
+       const ORFData *ORFParent;\r
+\r
+       SeqData()\r
+               {\r
+               Clear();\r
+               }\r
+\r
+       void Clear()\r
+               {\r
+               Label = 0;\r
+               Seq = 0;\r
+               L = 0;\r
+               Index = UINT_MAX;\r
+               RevComp = false;\r
+               Nucleo = false;\r
+               ORFParent = 0;\r
+               }\r
+       };\r
+\r
+#endif // seq_h\r
diff --git a/uchime_src/seqdb.cpp b/uchime_src/seqdb.cpp
new file mode 100644 (file)
index 0000000..03de189
--- /dev/null
@@ -0,0 +1,289 @@
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+#include "sfasta.h"\r
+#include "seq.h"\r
+\r
+void SeqToFasta(FILE *f, const char *Label, const byte *Seq, unsigned L)\r
+       {\r
+       const unsigned ROWLEN = 80;\r
+       if (Label != 0)\r
+               fprintf(f, ">%s\n", Label);\r
+       unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+       for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+               {\r
+               unsigned From = BlockIndex*ROWLEN;\r
+               unsigned To = From + ROWLEN;\r
+               if (To >= L)\r
+                       To = L;\r
+               for (unsigned Pos = From; Pos < To; ++Pos)\r
+                       fputc(Seq[Pos], f);\r
+               fputc('\n', f);\r
+               }\r
+       }\r
+\r
+SeqDB::~SeqDB()\r
+       {\r
+       Clear();\r
+       }\r
+\r
+SeqDB::SeqDB()\r
+       {\r
+       Clear(true);\r
+       }\r
+\r
+void SeqDB::Clear(bool ctor)\r
+       {\r
+       if (!ctor)\r
+               {\r
+               for (unsigned i = 0; i < m_SeqCount; ++i)\r
+                       {\r
+                       unsigned n = strlen(m_Labels[i]);\r
+                       MYFREE(m_Labels[i], n, SeqDB);\r
+                       MYFREE(m_Seqs[i], m_SeqLengths[i], SeqDB);\r
+                       }\r
+               MYFREE(m_Labels, m_Size, SeqDB);\r
+               MYFREE(m_Seqs, m_Size, SeqDB);\r
+               MYFREE(m_SeqLengths, m_Size, SeqDB);\r
+               }\r
+\r
+       m_FileName.clear();\r
+       m_SeqCount = 0;\r
+       m_Size = 0;\r
+\r
+       m_Labels = 0;\r
+       m_Seqs = 0;\r
+       m_SeqLengths = 0;\r
+\r
+       m_Aligned = false;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       }\r
+\r
+void SeqDB::InitEmpty(bool Nucleo)\r
+       {\r
+       Clear();\r
+       m_IsNucleo = Nucleo;\r
+       m_IsNucleoSet = true;\r
+       }\r
+\r
+void SeqDB::FromFasta(const string &FileName, bool AllowGaps)\r
+       {\r
+       Clear();\r
+       m_FileName = FileName;\r
+       SFasta SF;\r
+\r
+       SF.Open(FileName);\r
+       SF.m_AllowGaps = AllowGaps;\r
+\r
+       ProgressStep(0, 1000, "Reading %s", FileName.c_str());\r
+       for (;;)\r
+               {\r
+               unsigned QueryPctDoneX10 = SF.GetPctDoneX10();\r
+               ProgressStep(QueryPctDoneX10, 1000, "Reading %s", FileName.c_str());\r
+               const byte *Seq = SF.GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+\r
+               const char *Label = SF.GetLabel();\r
+               unsigned L = SF.GetSeqLength();\r
+               AddSeq(Label, Seq, L);\r
+               }\r
+       ProgressStep(999, 1000, "Reading %s", FileName.c_str());\r
+\r
+       SetIsNucleo();\r
+\r
+       Progress("%s sequences\n", IntToStr(GetSeqCount()));\r
+       }\r
+\r
+void SeqDB::ToFasta(const string &FileName) const\r
+       {\r
+       FILE *f = CreateStdioFile(FileName);\r
+       for (unsigned SeqIndex = 0; SeqIndex < GetSeqCount(); ++SeqIndex)\r
+               ToFasta(f, SeqIndex);\r
+       CloseStdioFile(f);\r
+       }\r
+\r
+void SeqDB::SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel) const\r
+       {\r
+       if (WithLabel)\r
+               fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+\r
+       const unsigned ROWLEN = 80;\r
+\r
+       unsigned L = GetSeqLength(SeqIndex);\r
+       const byte *Seq = GetSeq(SeqIndex);\r
+       unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+       for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+               {\r
+               unsigned From = BlockIndex*ROWLEN;\r
+               unsigned To = From + ROWLEN;\r
+               if (To >= L)\r
+                       To = L;\r
+               for (unsigned Pos = From; Pos < To; ++Pos)\r
+                       fputc(Seq[Pos], f);\r
+               fputc('\n', f);\r
+               }\r
+       }\r
+\r
+void SeqDB::ToFasta(FILE *f, unsigned SeqIndex) const\r
+       {\r
+       asserta(SeqIndex < m_SeqCount);\r
+       fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+       SeqToFasta(f, SeqIndex);\r
+       }\r
+\r
+unsigned SeqDB::GetMaxLabelLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned MaxL = 0;\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               unsigned L = (unsigned) strlen(m_Labels[Index]);\r
+               if (L > MaxL)\r
+                       MaxL = L;\r
+               }\r
+       return MaxL;\r
+       }\r
+\r
+unsigned SeqDB::GetMaxSeqLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned MaxL = 0;\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               unsigned L = m_SeqLengths[Index];\r
+               if (L > MaxL)\r
+                       MaxL = L;\r
+               }\r
+       return MaxL;\r
+       }\r
+\r
+void SeqDB::LogMe() const\r
+       {\r
+       Log("\n");\r
+       const unsigned SeqCount = GetSeqCount();\r
+       Log("SeqDB %u seqs, aligned=%c\n", SeqCount, tof(m_Aligned));\r
+       if (SeqCount == 0)\r
+               return;\r
+\r
+       Log("Index             Label  Length  Seq\n");\r
+       Log("-----  ----------------  ------  ---\n");\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               Log("%5u", Index);\r
+               Log("  %16.16s", m_Labels[Index]);\r
+               unsigned L = m_SeqLengths[Index];\r
+               Log("  %6u", L);\r
+               Log("  %*.*s", L, L, m_Seqs[Index]);\r
+               Log("\n");\r
+               }\r
+       }\r
+\r
+void SeqDB::GetSeqData(unsigned Id, SeqData &Buffer) const\r
+       {\r
+       asserta(Id < m_SeqCount);\r
+       Buffer.Seq = m_Seqs[Id];\r
+       Buffer.Label = m_Labels[Id];\r
+       Buffer.L = m_SeqLengths[Id];\r
+       Buffer.Index = Id;\r
+       Buffer.ORFParent = 0;\r
+       Buffer.RevComp = false;\r
+       Buffer.Nucleo = IsNucleo();\r
+       }\r
+\r
+void SeqDB::SetIsNucleo()\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned N = 0;\r
+       for (unsigned i = 0; i < 100; ++i)\r
+               {\r
+               unsigned SeqIndex = unsigned(rand()%SeqCount);\r
+               const byte *Seq = GetSeq(SeqIndex);\r
+               unsigned L = GetSeqLength(SeqIndex);\r
+               const unsigned Pos = unsigned(rand()%L);\r
+               byte c = Seq[Pos];\r
+\r
+               if (g_IsNucleoChar[c])\r
+                       ++N;\r
+               }\r
+       m_IsNucleo = (N > 80);\r
+       m_IsNucleoSet = true;\r
+       }\r
+\r
+unsigned SeqDB::GetTotalLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned TotalLength = 0;\r
+       for (unsigned Id = 0; Id < SeqCount; ++Id)\r
+               TotalLength += GetSeqLength(Id);\r
+       return TotalLength;\r
+       }\r
+\r
+unsigned SeqDB::AddSeq(const char *Label, const byte *Seq, unsigned L)\r
+       {\r
+       StartTimer(AddSeq);\r
+       if (m_SeqCount >= m_Size)\r
+               {\r
+               unsigned NewSize = unsigned(m_Size*1.5) + 1024;\r
+               char **NewLabels = MYALLOC(char *, NewSize, SeqDB);\r
+               byte **NewSeqs = MYALLOC(byte *, NewSize, SeqDB);\r
+               unsigned *NewSeqLengths = MYALLOC(unsigned, NewSize, SeqDB);\r
+\r
+               for (unsigned i = 0; i < m_SeqCount; ++i)\r
+                       {\r
+                       NewLabels[i] = m_Labels[i];\r
+                       NewSeqs[i] = m_Seqs[i];\r
+                       NewSeqLengths[i] = m_SeqLengths[i];\r
+                       }\r
+\r
+               MYFREE(m_Labels, m_SeqCount, SeqDB);\r
+               MYFREE(m_Seqs, m_SeqCount, SeqDB);\r
+               MYFREE(m_SeqLengths, m_SeqCount, SeqDB);\r
+\r
+               m_Labels = NewLabels;\r
+               m_Seqs = NewSeqs;\r
+               m_SeqLengths = NewSeqLengths;\r
+               m_Size = NewSize;\r
+               }\r
+\r
+       unsigned Index = m_SeqCount++;\r
+       m_Seqs[Index] = MYALLOC(byte, L, SeqDB);\r
+       memcpy(m_Seqs[Index], Seq, L);\r
+\r
+       unsigned n = strlen(Label) + 1;\r
+       m_Labels[Index] = MYALLOC(char, n, SeqDB);\r
+       memcpy(m_Labels[Index], Label, n);\r
+\r
+       if (Index == 0)\r
+               m_Aligned = true;\r
+       else\r
+               m_Aligned = (m_Aligned && L == m_SeqLengths[0]);\r
+\r
+       m_SeqLengths[Index] = L;\r
+\r
+       EndTimer(AddSeq);\r
+       return Index;\r
+       }\r
+\r
+unsigned SeqDB::GetIndex(const char *Label) const\r
+       {\r
+       for (unsigned i = 0; i < m_SeqCount; ++i)\r
+               if (strcmp(Label, m_Labels[i]) == 0)\r
+                       return i;\r
+       Die("SeqDB::GetIndex(%s), not found", Label);\r
+       return UINT_MAX;\r
+       }\r
+\r
+void SeqDB::MakeLabelToIndex(map<string, unsigned> &LabelToIndex)\r
+       {\r
+       LabelToIndex.clear();\r
+       for (unsigned i = 0; i < m_SeqCount; ++i)\r
+               {\r
+               const string &Label = string(GetLabel(i));\r
+               if (LabelToIndex.find(Label) != LabelToIndex.end())\r
+                       Die("Duplicate label: %s", Label.c_str());\r
+               LabelToIndex[Label] = i;\r
+               }\r
+       }\r
diff --git a/uchime_src/seqdb.h b/uchime_src/seqdb.h
new file mode 100644 (file)
index 0000000..e4af984
--- /dev/null
@@ -0,0 +1,108 @@
+#ifndef seqdb_h\r
+#define seqdb_h\r
+\r
+#include <vector>\r
+#include <map>\r
+\r
+struct SeqData;\r
+\r
+using namespace std;\r
+\r
+struct SeqDB\r
+       {\r
+private:\r
+       SeqDB(const SeqDB &rhs);\r
+       SeqDB &operator=(const SeqDB &rhs);\r
+\r
+public:\r
+       string m_FileName;\r
+       char **m_Labels;\r
+       byte **m_Seqs;\r
+       unsigned *m_SeqLengths;\r
+       unsigned m_SeqCount;\r
+       unsigned m_Size;\r
+\r
+       bool m_Aligned;\r
+       bool m_IsNucleo;\r
+       bool m_IsNucleoSet;\r
+\r
+public:\r
+       SeqDB();\r
+       ~SeqDB();\r
+       void Clear(bool ctor = false);\r
+       void InitEmpty(bool Nucleo);\r
+\r
+       unsigned AddSeq(const char *Label, const byte *Seq, unsigned L);\r
+\r
+       byte *GetSeq(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_Seqs[SeqIndex];\r
+               }\r
+\r
+       const char *GetLabel(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_Labels[SeqIndex];\r
+               }\r
+\r
+       unsigned GetSeqLength(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_SeqLengths[SeqIndex];\r
+               }\r
+\r
+       unsigned GetSeqCount() const\r
+               {\r
+               return m_SeqCount;\r
+               }\r
+\r
+       unsigned GetPairCount() const\r
+               {\r
+               unsigned SeqCount = GetSeqCount();\r
+               return (SeqCount*(SeqCount - 1))/2;\r
+               }\r
+\r
+       unsigned GetPairIndex(unsigned SeqIndex1, unsigned SeqIndex2) const\r
+               {\r
+               if (SeqIndex1 > SeqIndex2)\r
+                       return (SeqIndex1*(SeqIndex1 - 1))/2 + SeqIndex2;\r
+               return (SeqIndex2*(SeqIndex2 - 1))/2 + SeqIndex1;\r
+               }\r
+\r
+       unsigned GetColCount() const\r
+               {\r
+               if (!m_Aligned)\r
+                       Die("SeqDB::GetColCount, not aligned");\r
+               if (m_SeqCount == 0)\r
+                       Die("SeqDB::GetColCount, empty");\r
+               return m_SeqLengths[0];\r
+               }\r
+\r
+       bool IsNucleo() const\r
+               {\r
+               asserta(m_IsNucleoSet);\r
+               return m_IsNucleo;\r
+               }\r
+\r
+       void GetSeqData(unsigned Id, SeqData &Buffer) const;\r
+\r
+       unsigned GetMaxLabelLength() const;\r
+       unsigned GetMaxSeqLength() const;\r
+       void SetIsNucleo();\r
+       unsigned GetIndex(const char *Label) const;\r
+       void MakeLabelToIndex(map<string, unsigned> &LabelToIndex);\r
+\r
+       void LogMe() const;\r
+       void FromFasta(const string &FileName, bool AllowGaps = false);\r
+\r
+       void ToFasta(const string &FileName) const;\r
+       void ToFasta(FILE *f, unsigned SeqIndex) const;\r
+       void SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel = false) const;\r
+\r
+       unsigned GetTotalLength() const;\r
+       };\r
+\r
+bool isgap(byte c);\r
+\r
+#endif\r
diff --git a/uchime_src/setnucmx.cpp b/uchime_src/setnucmx.cpp
new file mode 100644 (file)
index 0000000..030ff5a
--- /dev/null
@@ -0,0 +1,77 @@
+#include "myutils.h"
+#include "mx.h"
+
+Mx<float> g_SubstMxf;
+float **g_SubstMx;
+
+static const char Alphabet[] = "ACGTU";
+
+void SetNucSubstMx(double Match, double Mismatch)\r
+       {\r
+       static bool Done = false;\r
+       if (Done)\r
+               return;\r
+       Done = true;\r
+\r
+       if (Match <= 0.0)\r
+               Die("Match score should be +ve");\r
+       if (Mismatch >= 0.0)\r
+               Die("Mismatch score should be -ve");\r
+\r
+       unsigned N = unsigned(strlen(Alphabet));\r
+\r
+       g_SubstMxf.Alloc("NUCMX", 256, 256);\r
+       strcpy(g_SubstMxf.m_Alpha, "ACGT");\r
+       g_SubstMxf.Init(0);\r
+       g_SubstMx = g_SubstMxf.GetData();\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               for (unsigned j = 0; j < N; ++j)\r
+                       {\r
+                       float v = float(i == j ? Match : Mismatch);\r
+\r
+                       byte ui = (byte) toupper(Alphabet[i]);\r
+                       byte uj = (byte) toupper(Alphabet[j]);\r
+                       byte li = (byte) tolower(ui);\r
+                       byte lj = (byte) tolower(uj);\r
+                       ui = (byte) toupper(ui);\r
+                       uj = (byte) toupper(uj);\r
+\r
+                       g_SubstMx[ui][uj] = v;\r
+                       g_SubstMx[uj][ui] = v;\r
+\r
+                       g_SubstMx[ui][lj] = v;\r
+                       g_SubstMx[uj][li] = v;\r
+\r
+                       g_SubstMx[li][uj] = v;\r
+                       g_SubstMx[lj][ui] = v;\r
+\r
+                       g_SubstMx[li][lj] = v;\r
+                       g_SubstMx[lj][li] = v;\r
+                       }\r
+               }\r
+\r
+       for (unsigned j = 0; j < N; ++j)\r
+               {\r
+               float v = 0.0f;\r
+\r
+               byte ui = (byte) 'N';\r
+               byte uj = (byte) toupper(Alphabet[j]);\r
+               byte li = (byte) 'n';\r
+               byte lj = (byte) tolower(uj);\r
+               ui = (byte) toupper(ui);\r
+               uj = (byte) toupper(uj);\r
+\r
+               g_SubstMx[ui][uj] = v;\r
+               g_SubstMx[uj][ui] = v;\r
+\r
+               g_SubstMx[ui][lj] = v;\r
+               g_SubstMx[uj][li] = v;\r
+\r
+               g_SubstMx[li][uj] = v;\r
+               g_SubstMx[lj][ui] = v;\r
+\r
+               g_SubstMx[li][lj] = v;\r
+               g_SubstMx[lj][li] = v;\r
+               }\r
+       }\r
diff --git a/uchime_src/sfasta.cpp b/uchime_src/sfasta.cpp
new file mode 100644 (file)
index 0000000..918d4f8
--- /dev/null
@@ -0,0 +1,467 @@
+#include "sfasta.h"\r
+#include "orf.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+static inline bool isgap(byte c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+const unsigned BufferSize = 16*1024*1024;\r
+\r
+static unsigned GetMaxPoly(const byte *Seq, unsigned L)\r
+       {\r
+       byte CurrChar = Seq[0];\r
+       unsigned Start = 0;\r
+       unsigned MaxLen = 1;\r
+       for (unsigned i = 1; i < L; ++i)\r
+               {\r
+               char c = Seq[i];\r
+               if (c != CurrChar || i+1 == L)\r
+                       {\r
+                       unsigned Len = i - Start;\r
+                       if (Len > MaxLen)\r
+                               MaxLen = Len;\r
+                       CurrChar = c;\r
+                       Start = i;\r
+                       }\r
+               }\r
+       return MaxLen;\r
+       }\r
+\r
+SFasta::SFasta()\r
+       {\r
+       m_FileName = "";\r
+       m_File = 0;\r
+       m_Buffer = 0;\r
+       m_BufferSize = 0;\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       m_FileSize = 0;\r
+       m_Label = 0;\r
+       m_SeqLength = 0;\r
+       m_TooShortCount = 0;\r
+       m_TooLongCount = 0;\r
+       m_ShortestLength = 0;\r
+       m_LongestLength = 0;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       }\r
+\r
+SFasta::~SFasta()\r
+       {\r
+       Clear();\r
+       }\r
+\r
+void SFasta::Clear()\r
+       {\r
+       MYFREE(m_Buffer, m_BufferSize, SFasta);\r
+       if (m_File != 0)\r
+               CloseStdioFile(m_File);\r
+\r
+       m_FileName = "";\r
+       m_File = 0;\r
+       m_Buffer = 0;\r
+       m_BufferSize = 0;\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       m_FileSize = 0;\r
+       m_Label = 0;\r
+       m_SeqLength = 0;\r
+       m_SeqIndex = UINT_MAX;\r
+       m_AllowGaps = false;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       m_TooShortCount = 0;\r
+       m_TooLongCount = 0;\r
+       m_ShortestLength = 0;\r
+       m_LongestLength = 0;\r
+       m_TooPolyCount = 0;\r
+       }\r
+\r
+void SFasta::LogMe() const\r
+       {\r
+       Log("\n");\r
+       Log("SFasta::LogMe()\n");\r
+       Log("FileName=%s\n", m_FileName.c_str());\r
+       Log("FileSize=%u\n", (unsigned) m_FileSize);\r
+       Log("FilePos=%u\n", (unsigned) m_FilePos);\r
+       Log("BufferSize=%u\n", m_BufferSize);\r
+       Log("BufferPos=%u\n", m_BufferOffset);\r
+       Log("BufferBytes=%u\n", m_BufferBytes);\r
+       if (m_Label == 0)\r
+               Log("Label=NULL\n");\r
+       else\r
+               Log("Label=%s\n", m_Label);\r
+       Log("SeqLength=%u\n", m_SeqLength);\r
+       }\r
+\r
+const byte *SFasta::GetNextSeq()\r
+       {\r
+       for (;;)\r
+               {\r
+               const byte *Seq = GetNextSeqLo();\r
+               if (Seq == 0)\r
+                       {\r
+                       if (m_TooShortCount > 0)\r
+                               Warning("%u short sequences (--minlen %u, shortest %u) discarded from %s",\r
+                                 m_TooShortCount, opt_minlen, m_ShortestLength, m_FileName.c_str());\r
+                       if (m_TooLongCount > 0)\r
+                               Warning("%u long sequences (--maxlen %u, longest %u) discarded from %s",\r
+                                 m_TooLongCount, opt_maxlen, m_LongestLength, m_FileName.c_str());\r
+                       if (m_TooPolyCount > 0)\r
+                               Warning("%u sequences with long homopolymers discarded (--maxpoly %u)",\r
+                                 m_TooPolyCount, opt_maxpoly);\r
+                       return 0;\r
+                       }\r
+               if (m_SeqLength < opt_minlen)\r
+                       {\r
+                       ++m_TooShortCount;\r
+                       if (m_ShortestLength == 0 || m_SeqLength < m_ShortestLength)\r
+                               m_ShortestLength = m_SeqLength;\r
+                       continue;\r
+                       }\r
+               if (m_SeqLength > opt_maxlen && opt_maxlen != 0)\r
+                       {\r
+                       if (m_LongestLength == 0 || m_SeqLength > m_LongestLength)\r
+                               m_LongestLength = m_SeqLength;\r
+                       ++m_TooLongCount;\r
+                       continue;\r
+                       }\r
+               return Seq;\r
+               }\r
+       }\r
+\r
+const byte *SFasta::GetNextSeqLo()\r
+       {\r
+// End of cache?\r
+       if (m_BufferOffset == m_BufferBytes)\r
+               {\r
+       // End of file?\r
+               if (m_FilePos == m_FileSize)\r
+                       return 0;\r
+               FillCache();\r
+               }\r
+\r
+       StartTimer(SF_GetNextSeq);\r
+       asserta(m_Buffer[m_BufferOffset] == '>');\r
+       m_Label = (char *) (m_Buffer + m_BufferOffset + 1);\r
+       \r
+//// Scan to end-of-line.\r
+//// Use dubious library function strchr() in the hope\r
+//// that it uses fast machine code.\r
+//     byte *ptr = (byte *) strchr(m_Label, '\n');\r
+//     asserta(ptr != 0);\r
+//     *ptr = 0;\r
+\r
+       byte *ptr = 0;\r
+       for (unsigned i = m_BufferOffset; i < m_BufferSize; ++i)\r
+               {\r
+               char c = m_Buffer[i];\r
+               if (c == '\n' || c == '\r')\r
+                       {\r
+                       ptr = m_Buffer + i;\r
+                       break;\r
+                       }\r
+               }\r
+       asserta(ptr != 0);\r
+\r
+       if (opt_trunclabels)\r
+               {\r
+               for (char *p = m_Label; *p; ++p)\r
+                       if (isspace(*p))\r
+                               {\r
+                               *p = 0;\r
+                               break;\r
+                               }\r
+               }\r
+       else\r
+               {\r
+               for (char *p = m_Label; *p; ++p)\r
+                       {\r
+                       if (*p == '\t')\r
+                               *p = ' ';\r
+                       else if (*p == '\r' || *p == '\n')\r
+                               {\r
+                               *p = 0;\r
+                               char NextChar = *(p+1);\r
+                               if (NextChar == '\r' || NextChar == '\n')\r
+                                       ++p;\r
+                               break;\r
+                               }\r
+                       }\r
+               }\r
+\r
+// ptr points to end-of-line.\r
+// Move to start of sequence data.\r
+       byte *Seq = ++ptr;\r
+\r
+// Delete white space in-place\r
+       byte *To = ptr;\r
+       m_BufferOffset = (unsigned) (ptr - m_Buffer);\r
+       while (m_BufferOffset < m_BufferBytes)\r
+               {\r
+               byte c = m_Buffer[m_BufferOffset];\r
+               if (c == '>')\r
+                       {\r
+                       char prevc = '\n';\r
+                       if (m_BufferOffset > 0)\r
+                               prevc = m_Buffer[m_BufferOffset-1];\r
+                       if (prevc == '\n' || prevc == '\r')\r
+                               break;\r
+                       }\r
+               ++m_BufferOffset;\r
+               if (isalpha(c) || (isgap(c) && m_AllowGaps))\r
+                       *To++ = c;\r
+               else if (c == '\n' || c == '\r')\r
+                       continue;\r
+               else\r
+                       {\r
+                       const char *Label = (m_Label == 0 ? "" : m_Label);\r
+                       static bool WarningDone = false;\r
+                       if (!WarningDone)\r
+                               {\r
+                               if (isgap(c))\r
+                                       Warning("Ignoring gaps in FASTA file '%s'",\r
+                                         m_FileName.c_str());\r
+                               else if (isprint(c))\r
+                                       Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s",\r
+                                         m_FileName.c_str(), c, Label);\r
+                               else\r
+                                       Warning("Invalid FASTA file '%s', non-printing byte (hex %02x) in sequence >%s",\r
+                                         m_FileName.c_str(), c, Label);\r
+                               WarningDone = true;\r
+                               }\r
+                       continue;\r
+                       }\r
+               }\r
+       m_SeqLength = unsigned(To - Seq);\r
+\r
+       if (m_SeqIndex == UINT_MAX)\r
+               m_SeqIndex = 0;\r
+       else\r
+               ++m_SeqIndex;\r
+\r
+       EndTimer(SF_GetNextSeq);\r
+       return Seq;\r
+       }\r
+\r
+void SFasta::Open(const string &FileName)\r
+       {\r
+       Clear();\r
+       m_FileName = FileName;\r
+       m_File = OpenStdioFile(FileName);\r
+       m_BufferSize = BufferSize;\r
+       //m_Buffer = myalloc<byte>(m_BufferSize);\r
+       m_Buffer = MYALLOC(byte, m_BufferSize, SFasta);\r
+       m_FileSize = GetStdioFileSize(m_File);\r
+       }\r
+\r
+void SFasta::Rewind()\r
+       {\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       }\r
+\r
+bool SFasta::SetIsNucleo()\r
+       {\r
+       if (m_FilePos != 0)\r
+               Die("SFasta::IsNucleo, not at BOF");\r
+\r
+       unsigned LetterCount = 0;\r
+       unsigned NucleoLetterCount = 0;\r
+       for (;;)\r
+               {\r
+               const byte *Seq = GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+               unsigned L = GetSeqLength();\r
+               for (unsigned i = 0; i < L; ++i)\r
+                       if (g_IsNucleoChar[Seq[i]])\r
+                               ++NucleoLetterCount;\r
+               LetterCount += L;\r
+               if (LetterCount > 256)\r
+                       break;\r
+               }\r
+       Rewind();\r
+       if (LetterCount == 0)\r
+               {\r
+               m_IsNucleoSet = true;\r
+               m_IsNucleo = true;\r
+               return true;\r
+               }\r
+\r
+// Nucleo if more than 90% nucleo letters AGCTUN\r
+       m_IsNucleo = double(NucleoLetterCount)/LetterCount > 0.9;\r
+       m_IsNucleoSet = true;\r
+       return m_IsNucleo;\r
+       }\r
+\r
+void SFasta::FillCache()\r
+       {\r
+       StartTimer(SF_FillCache);\r
+       asserta(m_FilePos < m_FileSize);\r
+\r
+// off_t may be larger type than unsigned, e.g. 64- vs. 32-bit.\r
+       off_t otBytesToRead = m_FileSize - m_FilePos;\r
+\r
+       bool FinalBuffer = true;\r
+       if (otBytesToRead > (off_t) m_BufferSize)\r
+               {\r
+               FinalBuffer = false;\r
+               otBytesToRead = m_BufferSize;\r
+               }\r
+\r
+       unsigned BytesToRead = unsigned(otBytesToRead);\r
+       asserta(BytesToRead > 0);\r
+       asserta(BytesToRead <= m_BufferSize);\r
+\r
+       SetStdioFilePos(m_File, m_FilePos);\r
+       ReadStdioFile(m_File, m_Buffer, BytesToRead);\r
+       if (m_Buffer[0] != '>')\r
+               {\r
+               if (m_FilePos == 0)\r
+                       Die("Input is not FASTA file");\r
+               else\r
+                       Die("SFasta::FillCache() failed, expected '>'");\r
+               }\r
+\r
+       m_BufferOffset = 0;\r
+\r
+// If last buffer in file, done\r
+       if (FinalBuffer)\r
+               {\r
+               m_BufferBytes = BytesToRead;\r
+               m_FilePos += BytesToRead;\r
+               EndTimer(SF_FillCache);\r
+               return;\r
+               }\r
+\r
+// If not last buffer, truncate any partial sequence\r
+// at end of buffer. Search backwards to find last '>'.\r
+       byte *ptr = m_Buffer + BytesToRead - 1;\r
+       while (ptr > m_Buffer)\r
+               {\r
+               if (ptr[0] == '>' && (ptr[-1] == '\n' || ptr[-1] == '\r'))\r
+                       break;\r
+               --ptr;\r
+               }\r
+\r
+       if (ptr == m_Buffer)\r
+               {\r
+               LogMe();\r
+               if (*ptr != '>')\r
+                       {\r
+       // No '>' found.\r
+       // This might techincally be legal FASTA if the entire\r
+       // buffer is white space, but strange if not the last buffer\r
+       // in the file, so quit anyway.\r
+                       Die("Failed to find '>' (pos=%u, bytes=%u)",\r
+                         (unsigned) m_FilePos, BytesToRead);\r
+                       }\r
+               else\r
+                       {\r
+       // Entire buffer is one sequence which may be truncated.\r
+                       Die("Sequence too long (pos=%u, bytes=%u)",\r
+                         (unsigned) m_FilePos, BytesToRead);\r
+                       }\r
+               }\r
+\r
+       asserta(*ptr == '>');\r
+\r
+       m_BufferBytes = unsigned(ptr - m_Buffer);\r
+       m_FilePos += m_BufferBytes;\r
+\r
+       EndTimer(SF_FillCache);\r
+       }\r
+\r
+unsigned SFasta::GetPctDoneX10() const\r
+       {\r
+       if (m_FilePos == 0 || m_FileSize == 0)\r
+               return 0;\r
+\r
+       assert(m_FilePos >= (off_t) m_BufferBytes);\r
+       off_t BufferStart = m_FilePos - m_BufferBytes;\r
+       off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+       unsigned iPctX10 = unsigned(10.0*double(BufferPos)*100.0/double(m_FileSize));\r
+       if (iPctX10 == 0)\r
+               return 1;\r
+       if (iPctX10 >= 999)\r
+               return 998;\r
+       return iPctX10;\r
+       }\r
+\r
+double SFasta::GetPctDone() const\r
+       {\r
+       if (m_FilePos == 0 || m_FileSize == 0)\r
+               return 0;\r
+\r
+       assert(m_FilePos >= (off_t) m_BufferBytes);\r
+       off_t BufferStart = m_FilePos - m_BufferBytes;\r
+       off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+       return double(BufferPos)*100.0/double(m_FileSize);\r
+       }\r
+\r
+bool SFasta::GetNextSD(SeqData &SD)\r
+       {\r
+       SD.Seq = GetNextSeq();\r
+       if (SD.Seq == 0)\r
+               return false;\r
+\r
+       SD.Label = GetLabel();\r
+       SD.L = GetSeqLength();\r
+       SD.Index = GetSeqIndex();\r
+       SD.ORFParent = 0;\r
+       SD.Nucleo = GetIsNucleo();\r
+       SD.RevComp = false;\r
+\r
+       return true;\r
+       }\r
+\r
+#if    TEST\r
+void TestSFasta()\r
+       {\r
+       SFasta SF;\r
+       SF.Open(opt_input);\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("  Index   Length  Label\n");\r
+               Log("-------  -------  -----\n");\r
+               }\r
+\r
+       unsigned Index = 0;\r
+       unsigned SeqCount = 0;\r
+       double LetterCount = 0.0;\r
+       ProgressStep(0, 1000, "Reading");\r
+       for (;;)\r
+               {\r
+               const byte *Seq = SF.GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+               ProgressStep(SF.GetPctDoneX10(), 1000, "Reading");\r
+               const char *Label = SF.GetLabel();\r
+               unsigned L = SF.GetSeqLength();\r
+               ++SeqCount;\r
+               LetterCount += L;\r
+\r
+               if (opt_verbose)\r
+                       {\r
+                       Log(">%7u  %7u  '%s'\n", Index, L, Label);\r
+                       Log("+%7.7s  %7.7s  \"%*.*s\"\n", "", "", L, L, Seq);\r
+                       }\r
+\r
+               ++Index;\r
+               }\r
+       ProgressStep(999, 1000, "Reading");\r
+\r
+       Progress("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+       Log("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+       }\r
+#endif // TEST\r
diff --git a/uchime_src/sfasta.h b/uchime_src/sfasta.h
new file mode 100644 (file)
index 0000000..ed2f2ff
--- /dev/null
@@ -0,0 +1,93 @@
+#ifndef sfasta_h\r
+#define sfasta_h\r
+\r
+#include "myutils.h"\r
+#include "seq.h"\r
+\r
+typedef void (*ON_START_XSEQ)(const SeqData &SD);\r
+typedef void (*ON_END_XSEQ)(const SeqData &SD);\r
+\r
+// Sequential reader for FASTA file format.\r
+// Serves sequences in file order to save memory.\r
+// Caches biggish chunks to compromise memory vs. speed.\r
+class SFasta\r
+       {\r
+public:\r
+       string m_FileName;\r
+       FILE *m_File;\r
+       bool m_AllowGaps;\r
+\r
+       off_t m_FileSize;\r
+\r
+// Position to start next read\r
+       off_t m_FilePos;\r
+\r
+// Cached data.\r
+       byte *m_Buffer;\r
+\r
+// Bytes allocated to m_Buffer\r
+       unsigned m_BufferSize;\r
+\r
+// Current position in buffer, normally points to '>'\r
+       unsigned m_BufferOffset;\r
+\r
+// File data in buffer <= m_BufferSize\r
+       unsigned m_BufferBytes;\r
+\r
+// Current label\r
+// Points into m_Buffer, not a separate buffer.\r
+       char *m_Label;\r
+\r
+// Current sequence length\r
+       unsigned m_SeqLength;\r
+\r
+// Current seq index\r
+       unsigned m_SeqIndex;\r
+\r
+       unsigned m_ShortestLength;\r
+       unsigned m_LongestLength;\r
+       unsigned m_TooShortCount;\r
+       unsigned m_TooLongCount;\r
+       unsigned m_TooPolyCount;\r
+\r
+private:\r
+       bool m_IsNucleoSet;\r
+       bool m_IsNucleo;\r
+\r
+public:\r
+       SFasta();\r
+       ~SFasta();\r
+\r
+       void Clear();\r
+       void Open(const string &FileName);\r
+       void Rewind();\r
+       bool SetIsNucleo();\r
+       bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; };\r
+\r
+// Get next sequence.\r
+// Returns zero on end-of-file\r
+       const byte *GetNextSeq();\r
+\r
+// Get next sequence as SeqData object, return false on end-of-file.\r
+       bool GetNextSD(SeqData &SD);\r
+\r
+// Length of most recent sequence returned by GetNextSeq().\r
+       unsigned GetSeqLength() const { return m_SeqLength; }\r
+\r
+// Label of most recent sequence returned by GetNextSeq().\r
+       const char *GetLabel() const { return m_Label; }\r
+\r
+// Index of most recent sequence returned by GetNextSeq().\r
+       unsigned GetSeqIndex() const { return m_SeqIndex; }\r
+\r
+       unsigned GetPctDoneX10() const;\r
+       double GetPctDone() const;\r
+\r
+       void LogMe() const;\r
+\r
+private:\r
+       void FillCache();\r
+       const byte *GetNextSeqLo();\r
+       };\r
+\r
+#endif // sfasta_h\r
diff --git a/uchime_src/svnmods.h b/uchime_src/svnmods.h
new file mode 100644 (file)
index 0000000..c68513e
--- /dev/null
@@ -0,0 +1,15 @@
+"Path: .\n"
+"URL: file:///public/svn/usearch\n"
+"Repository Root: file:///public/svn/usearch\n"
+"Repository UUID: 58640331-1837-4c17-bc3e-636dc59aced1\n"
+"Revision: 34\n"
+"Node Kind: directory\n"
+"Schedule: normal\n"
+"Last Changed Author: bob\n"
+"Last Changed Rev: 34\n"
+"Last Changed Date: 2011-05-01 08:29:04 -0700 (Sun, 01 May 2011)\n"
+"\n"
+"?       mk\n"
+"!       svnmods.h\n"
+"M       ungappedblastid.cpp\n"
+"M       chaindisjointhits.cpp\n"
diff --git a/uchime_src/svnversion.h b/uchime_src/svnversion.h
new file mode 100644 (file)
index 0000000..2a64d50
--- /dev/null
@@ -0,0 +1 @@
+"40"
diff --git a/uchime_src/timers.h b/uchime_src/timers.h
new file mode 100644 (file)
index 0000000..81cf7d1
--- /dev/null
@@ -0,0 +1,173 @@
+T(MxBase_Alloc)\r
+T(MxBase_FreeData)\r
+T(MxBase_AllocData)\r
+T(SortSeqIndexes)\r
+T(Alloc_Vectors)\r
+T(MainLoop_NotNW)\r
+T(WriteOutput)\r
+T(NWB)\r
+T(ReadAllStdioFile)\r
+T(Windex_Init)\r
+T(Windex_SetSeqIndex)\r
+T(SeqToWords)\r
+T(SeqToWordsStep)\r
+T(SeqToShortWords)\r
+T(SeqToShortWordsA)\r
+T(SeqToShortWordsB)\r
+T(GetFractIdB)\r
+T(Windex_UniqueWordsAlloc)\r
+T(Windex_UniqueWords)\r
+T(GetPctId)\r
+T(Windex_Reset)\r
+T(GetSig)\r
+T(NWEditDist)\r
+T(EditDist_Myers)\r
+T(EditDist_BlockTarget)\r
+T(NWBand)\r
+T(WordCounting)\r
+T(NWAff)\r
+T(NWAffBand)\r
+T(NWSimple)\r
+T(NWSimpleB)\r
+T(BandWrap)\r
+T(IncIdCounts)\r
+T(GetBestDiagB)\r
+T(GetBestDiagB1)\r
+T(GetBestDiagB2)\r
+T(ClusterInit)\r
+T(ClusterPrep)\r
+T(HotSort1)\r
+T(HotSort2)\r
+T(SortA)\r
+T(SortB)\r
+T(CountSort)\r
+T(AddWords)\r
+T(ClusterWindex)\r
+T(MainInit)\r
+T(Output)\r
+T(WindexTail)\r
+T(WindexExit)\r
+T(Sort)\r
+T(U_AllocSeqLength)\r
+T(U_AllocSeedCount)\r
+T(U_AddSeed)\r
+T(AddSeq)\r
+T(U_SetWordCounts)\r
+T(U_SetWordCountsHash)\r
+T(U_SetWordScores)\r
+T(U_SetHotHits)\r
+T(U_SetHotHitsHash)\r
+T(U_SetHotHitsScores)\r
+T(U_Search)\r
+T(U_SearchExact)\r
+T(WF_SeqToWords)\r
+T(WF_SeqToWordsA)\r
+T(WF_SeqToWordsB)\r
+T(WF_AllocLA)\r
+T(WF_AllocLB)\r
+T(WF_AllocDiags)\r
+T(WF_SetA)\r
+T(WF_SetA_Nb)\r
+T(WF_SetAZero)\r
+T(WF_SetA2)\r
+T(WF_SetB)\r
+T(WF_GetCommonWordCount)\r
+T(WF_GetBestDiag)\r
+T(GetFractIdGivenPath)\r
+T(WX_GetUniqueWords)\r
+T(CompressPath)\r
+T(GetHSPs1)\r
+T(GetHSPs2)\r
+T(AlignHSPs)\r
+T(WF_ResolveHSPs)\r
+T(WX_SetExcludes)\r
+T(ViterbiFast)\r
+T(ViterbiFastBand)\r
+T(ViterbiFastBand0)\r
+T(ViterbiFastBand1)\r
+T(ViterbiFastBand2)\r
+T(ViterbiFastBand3)\r
+T(ViterbiFastBand4)\r
+T(TraceBackBit)\r
+T(TraceBackBitSW)\r
+T(SF_GetNextSeq)\r
+T(SF_FillCache)\r
+T(OnGlobalAccept)\r
+T(UngappedBlast)\r
+T(UngappedBlastId)\r
+T(UngappedBlast2Hit)\r
+T(LogHSPs)\r
+T(BlastOutput)\r
+T(BlastLeft)\r
+T(BlastRight)\r
+T(Blast1)\r
+T(Blast2)\r
+T(Blast3)\r
+T(Blast4)\r
+T(GetBestSeg)\r
+T(SWLinearDP)\r
+T(SWLinearTB)\r
+T(SWLinearDP2)\r
+T(SWLinearTB2)\r
+T(Chain)\r
+T(XlatSeq)\r
+T(XlatSeqToLetters)\r
+T(XDropFwdSimple)\r
+T(XDropFwdFast)\r
+T(XDropFwdFastTB)\r
+T(XDropBwd)\r
+T(SWSimple)\r
+T(PathAlloc)\r
+T(SubPath)\r
+T(SWUngapped)\r
+T(SWFast)\r
+T(SWFastNTB)\r
+T(SWAT_CacheQuery)\r
+T(SWAT_AlignTarget)\r
+T(SWAT_CacheQueryNW)\r
+T(SWAT_AlignTargetNW)\r
+T(SeqDB_FromFasta)\r
+T(LocalUngappedHitToAD)\r
+T(LocalGappedHitToAD)\r
+T(GlobalHitToAD)\r
+T(ResolveOverlaps)\r
+T(GetORFs)\r
+T(ChainCov_AddHit)\r
+T(ChainCov_EndQuery)\r
+T(ChainCov_DoTarget)\r
+T(BuildNb)\r
+T(MakeIntSubstMx)\r
+T(UngappedExtendLeft)\r
+T(UngappedExtendRight)\r
+T(AlignSP)\r
+T(AlignHSP)\r
+\r
+// Background\r
+T(Bg_SearchLoop)\r
+T(Bg_MainInit)\r
+T(Bg_MainTerm)\r
+T(Bg_Other)\r
+T(Bg_1)\r
+T(Bg_2)\r
+T(Bg_3)\r
+T(Bg_4)\r
+T(Bg_5)\r
+T(Bg_6)\r
+T(Bg_7)\r
+T(Bg_8)\r
+T(Bg_9)\r
+T(Bg_XFrame2)\r
+T(Bg_Usearch1)\r
+T(Bg_Usearch2)\r
+T(Bg_Usearch3)\r
+T(Bg_Usearch4)\r
+T(Bg_Hot)\r
+\r
+// For Timer2\r
+T(Search_2)\r
+T(Search_Loop_2)\r
+T(Search_InnerLoop_2)\r
+T(OnHit_2)\r
+T(UngappedBlast_2)\r
+T(MainInit_2)\r
+T(MainTerm_2)\r
diff --git a/uchime_src/timing.h b/uchime_src/timing.h
new file mode 100644 (file)
index 0000000..0a80aee
--- /dev/null
@@ -0,0 +1,238 @@
+#define TIMING 0
+#ifndef timing_h
+#define timing_h
+
+#define BG_TIMING      0
+
+#if !TIMING
+#undef BG_TIMING
+#define BG_TIMING      0
+#endif
+
+#if    UCHIMES
+#undef TIMING
+#define TIMING 0
+#endif
+
+#if TIMING
+
+enum TIMER
+       {
+       TIMER_None,
+#define T(x)   TIMER_##x,
+#include "timers.h"
+#undef T
+       };
+
+const unsigned TimerCount =
+       1       // TIMER_None
+#define T(x)   +1
+#include "timers.h"
+#undef T
+       ;
+
+enum COUNTER
+       {
+#define C(x)   COUNTER_##x,
+#include "counters.h"
+#undef C
+       };
+
+enum ALLOCER
+       {
+#define A(x)   ALLOCER_##x,
+#include "allocs.h"
+#undef A
+       };
+
+const unsigned CounterCount =
+#define C(x)   +1
+#include "counters.h"
+#undef C
+       ;
+
+const unsigned AllocerCount =
+#define A(x)   +1
+#include "allocs.h"
+#undef A
+       ;
+
+#ifdef _MSC_VER
+
+typedef unsigned __int64 TICKS;
+
+#pragma warning(disable:4035)
+inline TICKS GetClockTicks()
+       {
+       _asm
+               {
+               _emit   0x0f
+               _emit   0x31
+               }
+       }
+
+#else  // ifdef _MSC_VER
+
+typedef uint64_t TICKS;
+__inline__ uint64_t GetClockTicks()
+       {
+       uint32_t lo, hi;
+       /* We cannot use "=A", since this would use %rax on x86_64 */
+       __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+       return (uint64_t)hi << 32 | lo;
+       }
+
+#endif // ifdef _MSC_VER
+
+//void AddTicks(const string &Name, TICKS Ticks1, TICKS Ticks2);
+//void AddBytes(const string &Name, double Bytes);
+//#define SubBytes(Name, Bytes)        AddBytes(Name, -double(Bytes))
+
+const char *TimerToStr(TIMER t);
+
+extern TICKS g_BeginTicks[TimerCount];
+extern double g_TotalTicks[TimerCount];
+extern double g_TotalCounts[TimerCount];
+extern double g_Counters[CounterCount];
+extern unsigned g_AllocNewCount[AllocerCount];
+extern unsigned g_AllocFreeCount[AllocerCount];
+extern double g_AllocNewBytes[AllocerCount];
+extern double g_AllocFreeBytes[AllocerCount];
+extern double g_AllocNetBytes[AllocerCount];
+extern double g_AllocPeakBytes[AllocerCount];
+extern bool g_Timer2[TimerCount];
+extern TIMER g_CurrTimer;
+#if    BG_TIMING
+extern TIMER g_BackgroundTimer;
+#endif
+
+#define MYALLOC(Type, N, Name)         (Type *) MyAlloc_((N)*sizeof(Type), ALLOCER_##Name, __FILE__, __LINE__)
+#define MYFREE(Array, N, Name)         MyFree_(Array, N*sizeof(Array[0]), ALLOCER_##Name, __FILE__, __LINE__)
+
+inline void *MyAlloc_(unsigned Bytes, unsigned a, const char *FileName, int Line)
+       {
+       ++g_AllocNewCount[a];
+       g_AllocNewBytes[a] += Bytes;
+       g_AllocNetBytes[a] += Bytes;
+       if (g_AllocNetBytes[a] > g_AllocPeakBytes[a])
+               g_AllocPeakBytes[a] = g_AllocNetBytes[a];
+       return mymalloc(Bytes);
+       }
+
+inline void MyFree_(void *p, unsigned Bytes, unsigned a, const char *FileName, int Line)
+       {
+       ++g_AllocFreeCount[a];
+       g_AllocFreeBytes[a] += Bytes;
+       g_AllocNetBytes[a] -= Bytes;
+       myfree2(p, Bytes);
+       }
+
+#if    BG_TIMING
+inline void SetBackgroundTimer_(TIMER Timer)
+       {
+       TICKS Now = GetClockTicks();
+       if (g_BeginTicks[g_BackgroundTimer] != 0)
+               {
+               ++g_TotalCounts[g_BackgroundTimer];
+               g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+               }
+       g_BackgroundTimer = Timer;
+       g_BeginTicks[Timer] = Now;
+       }
+#else
+#define SetBackgroundTimer_(Timer)     /* empty */
+#endif
+
+inline void StartTimer_(TIMER Timer)
+       {
+       if (g_CurrTimer != TIMER_None)
+               Die("StartTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+#if    BG_TIMING
+       if (g_BeginTicks[g_BackgroundTimer] != 0)
+               {
+               ++g_TotalCounts[g_BackgroundTimer];
+               g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+               }
+#endif
+       g_BeginTicks[Timer] = Now;
+       g_CurrTimer = Timer;
+       }
+
+inline void PauseTimer_(TIMER Timer)
+       {
+       if (Timer != g_CurrTimer)
+               Die("PauseTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+       g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+       g_BeginTicks[Timer] = Now;
+       g_CurrTimer = TIMER_None;
+       }
+
+inline void EndTimer_(TIMER Timer)
+       {
+       if (Timer != g_CurrTimer)
+               Die("EndTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+#if    BG_TIMING
+       g_BeginTicks[g_BackgroundTimer] = Now;
+#endif
+       g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+       ++g_TotalCounts[Timer];
+       g_CurrTimer = TIMER_None;
+       }
+
+inline void StartTimer2_(TIMER Timer)
+       {
+       g_Timer2[Timer] = true;
+       g_BeginTicks[Timer] = GetClockTicks();
+       }
+
+inline void EndTimer2_(TIMER Timer)
+       {
+       g_TotalTicks[Timer] += double(GetClockTicks() - g_BeginTicks[Timer]);
+       ++g_TotalCounts[Timer];
+       }
+
+#define AddCounter(x, N)       g_Counters[COUNTER_##x] += N
+#define IncCounter(x)          ++(g_Counters[COUNTER_##x])
+#define StartTimer(x)          StartTimer_(TIMER_##x)
+#define PauseTimer(x)          PauseTimer_(TIMER_##x)
+#define EndTimer(x)                    EndTimer_(TIMER_##x)
+#define StartTimer2(x)         StartTimer2_(TIMER_##x)
+#define EndTimer2(x)           EndTimer2_(TIMER_##x)
+
+#if    BG_TIMING
+#define SetBackgroundTimer(x)  SetBackgroundTimer_(TIMER_##x)
+#else
+#define SetBackgroundTimer(x)  /* empty */
+#endif
+
+#else  // if TIMING
+
+#define AddCounter(x, N)       /* empty */
+#define IncCounter(x)          /* empty */
+#define StartTimer(x)          /* empty */
+#define PauseTimer(x)          /* empty */
+#define EndTimer(x)                    /* empty */
+#define StartTimer2(x)         /* empty */
+#define PauseTimer2(x)         /* empty */
+#define EndTimer2(x)           /* empty */
+#define SetBackgroundTimer(x)  /* empty */
+#define MYALLOC(Type, N, Name)         myalloc(Type, N)
+#define MYFREE(Array, N, Name)         myfree(Array)
+
+#endif // if TIMING
+
+void LogMemStats();
+void LogTickStats();
+void LogStats();
+void LogAllocs();
+
+#define AddBytes(x, n) /* empty */
+#define SubBytes(x, n) /* empty */
+
+#endif // if timing_h
diff --git a/uchime_src/tracebackbit.cpp b/uchime_src/tracebackbit.cpp
new file mode 100644 (file)
index 0000000..94159cd
--- /dev/null
@@ -0,0 +1,180 @@
+#include "dp.h"
+
+#define TRACE  0
+
+Mx<byte> g_Mx_TBBit;
+byte **g_TBBit;
+float *g_DPRow1;
+float *g_DPRow2;
+static float *g_DPBuffer1;
+static float *g_DPBuffer2;
+
+static unsigned g_CacheLB;
+
+void AllocBit(unsigned LA, unsigned LB)
+       {
+       g_Mx_TBBit.Alloc("TBBit", LA+1, LB+1);
+       g_TBBit = g_Mx_TBBit.GetData();
+       if (LB > g_CacheLB)
+               {
+               MYFREE(g_DPBuffer1, g_CacheLB, AllocBit);
+               MYFREE(g_DPBuffer2, g_CacheLB, AllocBit);
+
+               g_CacheLB = LB + 128;
+
+       // Allow use of [-1]
+               //g_DPBuffer1 = myalloc<float>(g_CacheLB+3);
+               //g_DPBuffer2 = myalloc<float>(g_CacheLB+3);
+               g_DPBuffer1 = MYALLOC(float, g_CacheLB+3, AllocBit);
+               g_DPBuffer2 = MYALLOC(float, g_CacheLB+3, AllocBit);
+               g_DPRow1 = g_DPBuffer1 + 1;
+               g_DPRow2 = g_DPBuffer2 + 1;
+               }
+       }
+
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD)
+       {
+       PD.Alloc(LA+LB);
+
+       StartTimer(TraceBackBit);
+       char *PathPtr = PD.Back;
+       *PathPtr = 0;
+
+       byte **TB = g_TBBit;
+
+#if    TRACE
+       Log("\n");
+       Log("TraceBackBit\n");
+#endif
+
+       size_t i = LA;
+       size_t j = LB;
+       for (;;)
+               {
+#if    TRACE
+               Log("i=%3d  j=%3d  state=%c\n", (int) i, (int) j, State);
+#endif
+               if (i == 0 && j == 0)
+                       break;
+
+               --PathPtr;
+               *PathPtr = State;
+
+               byte t;
+               switch (State)
+                       {
+               case 'M':
+                       asserta(i > 0 && j > 0);
+                       t = TB[i-1][j-1];
+                       if (t & TRACEBITS_DM)
+                               State = 'D';
+                       else if (t & TRACEBITS_IM)
+                               State = 'I';
+                       else
+                               State = 'M';
+                       --i;
+                       --j;
+                       break;
+               case 'D':
+                       asserta(i > 0);
+                       t = TB[i-1][j];
+                       if (t & TRACEBITS_MD)
+                               State = 'M';
+                       else
+                               State = 'D';
+                       --i;
+                       break;
+
+               case 'I':
+                       asserta(j > 0);
+                       t = TB[i][j-1];
+                       if (t & TRACEBITS_MI)
+                               State = 'M';
+                       else
+                               State = 'I';
+                       --j;
+                       break;
+
+               default:
+                       Die("TraceBackBit, invalid state %c", State);
+                       }
+               }
+       PD.Start = PathPtr;
+       EndTimer(TraceBackBit);
+       }
+
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,
+  unsigned &Leni, unsigned &Lenj, PathData &PD)
+       {
+       PD.Alloc(LA+LB);
+
+       StartTimer(TraceBackBitSW);
+       char *PathPtr = PD.Back;
+       *PathPtr = 0;
+
+       byte **TB = g_TBBit;
+
+#if    TRACE
+       Log("\n");
+       Log("TraceBackBitSW\n");
+#endif
+
+       unsigned i = Besti;
+       unsigned j = Bestj;
+       char State = 'M';
+       for (;;)
+               {
+#if    TRACE
+               Log("i=%3d  j=%3d  state=%c\n", (int) i, (int) j, State);
+#endif
+               --PathPtr;
+               *PathPtr = State;
+
+               byte t;
+               switch (State)
+                       {
+               case 'M':
+                       asserta(i > 0 && j > 0);
+                       t = TB[i-1][j-1];
+                       if (t & TRACEBITS_DM)
+                               State = 'D';
+                       else if (t & TRACEBITS_IM)
+                               State = 'I';
+                       else if (t & TRACEBITS_SM)
+                               {
+                               Leni = Besti - i + 1;
+                               Lenj = Bestj - j + 1;
+                               PD.Start = PathPtr;
+                               EndTimer(TraceBackBitSW);
+                               return;
+                               }
+                       else
+                               State = 'M';
+                       --i;
+                       --j;
+                       break;
+               case 'D':
+                       asserta(i > 0);
+                       t = TB[i-1][j];
+                       if (t & TRACEBITS_MD)
+                               State = 'M';
+                       else
+                               State = 'D';
+                       --i;
+                       break;
+
+               case 'I':
+                       asserta(j > 0);
+                       t = TB[i][j-1];
+                       if (t & TRACEBITS_MI)
+                               State = 'M';
+                       else
+                               State = 'I';
+                       --j;
+                       break;
+
+               default:
+                       Die("TraceBackBitSW, invalid state %c", State);
+                       }
+               }
+       }
diff --git a/uchime_src/uc.h b/uchime_src/uc.h
new file mode 100644 (file)
index 0000000..93ef892
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef uc_h\r
+#define uc_h\r
+\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "path.h"\r
+\r
+struct AlnData;\r
+\r
+class UCFile\r
+       {\r
+public:\r
+       FILE *m_File;\r
+       byte *m_Data;\r
+       vector<char> m_RecTypes;\r
+       vector<float> m_PctIds;\r
+       vector<const char *> m_Labels;\r
+       vector<const char *> m_SeedLabels;\r
+       vector<unsigned> m_SeedIndexes;\r
+       vector<const char *> m_CompressedPaths;\r
+       vector<unsigned> m_SeqLengths;\r
+       vector<unsigned> m_SortOrder;\r
+       vector<char> m_Strands;\r
+       vector<unsigned> m_Los;\r
+       vector<unsigned> m_SeedLos;\r
+\r
+public:\r
+       UCFile();\r
+       void Clear(bool ctor = false);\r
+       void Close();\r
+       void FromFile(const string &FileName);\r
+       void FromClstr(const string &FileName);\r
+       void ToFile(const string &FileName);\r
+       unsigned GetRecordCount() const;\r
+       void LogMe() const;\r
+       void ToClstr(const string &FileName);\r
+       void ToFasta(const string &FileName, const SeqDB &Input, bool Reformat);\r
+       void Create(const string &FileName);\r
+       void Sort();\r
+       void Flush() const;\r
+\r
+       void WriteNotMatched(unsigned L, const char *Label) const;\r
+       void WriteLibSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+       void WriteNewSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+       void WriteHit(const SeqData &SA, const SeqData &SB, double FractId,\r
+         const PathData &PD) const;\r
+       void WriteReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+         const char *Path) const;\r
+       void WriteHit(unsigned SeedIndex, unsigned L, double PctId,\r
+         const char *CompressedPath, char Strand, unsigned Lo, unsigned SeedLo,\r
+         const char *Label, const char *SeedLabel) const;\r
+       void WriteHit(const AlnData &AD);\r
+       void WriteLibCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+         const char *Label) const;\r
+       void WriteNewCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+         const char *Label) const;\r
+       void WriteSeqX(FILE *f, const byte *Seq, unsigned L, const char *CompressedPath) const;\r
+       };\r
+\r
+#endif // uc_h\r
diff --git a/uchime_src/uchime_main.cpp b/uchime_src/uchime_main.cpp
new file mode 100644 (file)
index 0000000..b9d69ad
--- /dev/null
@@ -0,0 +1,212 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "seqdb.h"\r
+#include "dp.h"\r
+#include "ultra.h"\r
+#include "hspfinder.h"\r
+#include <algorithm>\r
+#include <set>\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+  float MinFractId, ChimeHit2 &Hit);\r
+\r
+FILE *g_fUChime;\r
+FILE *g_fUChimeAlns;\r
+const vector<float> *g_SortVecFloat;\r
+bool g_UchimeDeNovo = false;\r
+\r
+void Usage()\r
+       {\r
+       printf("\n");\r
+       printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
+       printf("http://www.drive5.com/uchime\n");\r
+       printf("\n");\r
+       printf("This software is donated to the public domain\n");\r
+       printf("\n");\r
+\r
+       printf(\r
+#include "help.h"\r
+               );\r
+       }\r
+\r
+void SetBLOSUM62()\r
+       {\r
+       Die("SetBLOSUM62 not implemented");\r
+       }\r
+\r
+void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
+       {\r
+       Die("ReadSubstMx not implemented");\r
+       }\r
+\r
+void LogAllocs()\r
+       {\r
+       /*empty*/\r
+       }\r
+\r
+static bool CmpDescVecFloat(unsigned i, unsigned j)\r
+       {\r
+       return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
+       }\r
+\r
+void Range(vector<unsigned> &v, unsigned N)\r
+       {\r
+       v.clear();\r
+       v.reserve(N);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               v.push_back(i);\r
+       }\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
+       {\r
+       StartTimer(Sort);\r
+       const unsigned N = SIZE(Values);\r
+       Range(Order, N);\r
+       g_SortVecFloat = &Values;\r
+       sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
+       EndTimer(Sort);\r
+       }\r
+\r
+float GetAbFromLabel(const string &Label)\r
+       {\r
+       vector<string> Fields;\r
+       Split(Label, Fields, '/');\r
+       const unsigned N = SIZE(Fields);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               const string &Field = Fields[i];\r
+               if (Field.substr(0, 3) == "ab=")\r
+                       {\r
+                       string a = Field.substr(3, string::npos);\r
+                       return (float) atof(a.c_str());\r
+                       }\r
+               }\r
+       if (g_UchimeDeNovo)\r
+               Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
+       return 0.0;\r
+       }\r
+\r
+int main(int argc, char *argv[])\r
+       {\r
+               \r
+       MyCmdLine(argc, argv);\r
+\r
+       if (argc < 2)\r
+               {\r
+               Usage();\r
+               return 0;\r
+               }\r
+\r
+       if (opt_version)\r
+               {\r
+               printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+               return 0;\r
+               }\r
+\r
+       printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+       printf("by Robert C. Edgar\n");\r
+       printf("http://drive5.com/uchime\n");\r
+       printf("This code is donated to the public domain.\n");\r
+       printf("\n");\r
+       if (!optset_w)\r
+               opt_w = 8;\r
+       \r
+       float MinFractId = 0.95f;\r
+       if (optset_id)\r
+               MinFractId = (float) opt_id;\r
+\r
+       Log("%8.2f  minh\n", opt_minh);\r
+       Log("%8.2f  xn\n", opt_xn);\r
+       Log("%8.2f  dn\n", opt_dn);\r
+       Log("%8.2f  xa\n", opt_xa);\r
+       Log("%8.2f  mindiv\n", opt_mindiv);\r
+       Log("%8u  maxp\n", opt_maxp);\r
+\r
+       if (opt_input == "" && opt_uchime != "")\r
+               opt_input = opt_uchime;\r
+\r
+       if (opt_input == "")\r
+               Die("Missing --input");\r
+\r
+       g_UchimeDeNovo = (opt_db == "");\r
+\r
+       if (opt_uchimeout != "")\r
+               g_fUChime = CreateStdioFile(opt_uchimeout);\r
+\r
+       if (opt_uchimealns != "")\r
+               g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
+\r
+       SeqDB Input;\r
+       SeqDB DB;\r
+\r
+       Input.FromFasta(opt_input);\r
+       if (!Input.IsNucleo())\r
+               Die("Input contains amino acid sequences");\r
+\r
+       const unsigned QuerySeqCount = Input.GetSeqCount();\r
+       vector<unsigned> Order;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               Order.push_back(i);\r
+\r
+       if (g_UchimeDeNovo)\r
+               {\r
+               vector<float> Abs;\r
+               for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+                       {\r
+                       const char *Label = Input.GetLabel(i);\r
+                       float Ab = GetAbFromLabel(Label);\r
+                       Abs.push_back(Ab);\r
+                       }\r
+               SortDescending(Abs, Order);\r
+               DB.m_IsNucleoSet = true;\r
+               DB.m_IsNucleo = true;\r
+               }\r
+       else\r
+               {\r
+               DB.FromFasta(opt_db);\r
+               if (!DB.IsNucleo())\r
+                       Die("Database contains amino acid sequences");\r
+               }\r
+\r
+       vector<ChimeHit2> Hits;\r
+       unsigned HitCount = 0;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               {\r
+               unsigned QuerySeqIndex = Order[i];\r
+\r
+               SeqData QSD;\r
+               Input.GetSeqData(QuerySeqIndex, QSD);\r
+\r
+               float QAb = -1.0;\r
+               if (g_UchimeDeNovo)\r
+                       QAb = GetAbFromLabel(QSD.Label);\r
+\r
+               ChimeHit2 Hit;\r
+               AlnParams &AP = *(AlnParams *) 0;\r
+               AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
+               HSPFinder &HF = *(HSPFinder *) 0;\r
+               bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
+               if (Found)\r
+                       ++HitCount;\r
+               else\r
+                       {\r
+                       if (g_UchimeDeNovo)\r
+                               DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
+                       }\r
+\r
+               WriteChimeHit(g_fUChime, Hit);\r
+\r
+               ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
+               }\r
+\r
+       Log("\n");\r
+       Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
+         opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
+\r
+       CloseStdioFile(g_fUChime);\r
+       CloseStdioFile(g_fUChimeAlns);\r
+\r
+       ProgressExit();\r
+       return 0;\r
+       }\r
diff --git a/uchime_src/ultra.h b/uchime_src/ultra.h
new file mode 100644 (file)
index 0000000..e0a432f
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef ultra_h
+#define ultra_h
+
+#include "seqdb.h"
+#define Ultra SeqDB
+#define GetSeedLabel GetLabel
+
+#endif // ultra_h
diff --git a/uchime_src/usort.cpp b/uchime_src/usort.cpp
new file mode 100644 (file)
index 0000000..922dcb4
--- /dev/null
@@ -0,0 +1,86 @@
+#if    UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "alpha.h"\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+\r
+static byte *g_QueryHasWord;\r
+static unsigned g_WordCount;\r
+\r
+unsigned GetWord(const byte *Seq)\r
+       {\r
+       unsigned Word = 0;\r
+       const byte *Front = Seq;\r
+       for (unsigned i = 0; i < opt_w; ++i)\r
+               {\r
+               unsigned Letter = g_CharToLetterNucleo[*Front++];\r
+               Word = (Word*4) + Letter;\r
+               }\r
+       return Word;\r
+       }\r
+\r
+static void SetQuery(const SeqData &Query)\r
+       {\r
+       if (g_QueryHasWord == 0)\r
+               {\r
+               g_WordCount = 4;\r
+               for (unsigned i = 1; i < opt_w; ++i)\r
+                       g_WordCount *= 4;\r
+\r
+               g_QueryHasWord = myalloc(byte, g_WordCount);\r
+               }\r
+\r
+       memset(g_QueryHasWord, 0, g_WordCount);\r
+\r
+       if (Query.L <= opt_w)\r
+               return;\r
+\r
+       const unsigned L = Query.L - opt_w + 1;\r
+       const byte *Seq = Query.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               g_QueryHasWord[Word] = 1;\r
+               }\r
+       }\r
+\r
+static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
+       {\r
+       if (Target.L <= opt_w)\r
+               return 0;\r
+\r
+       unsigned Count = 0;\r
+       const unsigned L = Target.L - opt_w + 1;\r
+       const byte *Seq = Target.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               if (g_QueryHasWord[Word])\r
+                       ++Count;\r
+               }\r
+       return Count;\r
+       }\r
+\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
+  vector<unsigned> &Order)\r
+       {\r
+       WordCounts.clear();\r
+       Order.clear();\r
+\r
+       SetQuery(Query);\r
+\r
+       const unsigned SeqCount = DB.GetSeqCount();\r
+       for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+               {\r
+               SeqData Target;\r
+               DB.GetSeqData(SeqIndex, Target);\r
+               float WordCount = (float) GetUniqueWordsInCommon(Target);\r
+               WordCounts.push_back(WordCount);\r
+               }\r
+       SortDescending(WordCounts, Order);\r
+       }\r
+\r
+#endif // UCHIMES\r
diff --git a/uchime_src/viterbifast.cpp b/uchime_src/viterbifast.cpp
new file mode 100644 (file)
index 0000000..2b20174
--- /dev/null
@@ -0,0 +1,378 @@
+#include "dp.h"
+#include "out.h"
+#include "evalue.h"
+
+#define CMP_SIMPLE     0
+\r
+#if    SAVE_FAST
+static Mx<float> g_MxDPM;
+static Mx<float> g_MxDPD;
+static Mx<float> g_MxDPI;
+
+static Mx<char> g_MxTBM;
+static Mx<char> g_MxTBD;
+static Mx<char> g_MxTBI;
+
+static float **g_DPM;
+static float **g_DPD;
+static float **g_DPI;
+
+static char **g_TBM;
+static char **g_TBD;
+static char **g_TBI;
+
+#if    CMP_SIMPLE
+static Mx<float> *g_DPMSimpleMx;
+static Mx<float> *g_DPDSimpleMx;
+static Mx<float> *g_DPISimpleMx;
+static float **g_DPMSimple;
+static float **g_DPDSimple;
+static float **g_DPISimple;
+
+#define cmpm(i, j, x)  { if (!feq(x, g_DPMSimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPMSimple[i][j]); \
+                                                       } \
+                                               }
+
+#define cmpd(i, j, x)  { if (!feq(x, g_DPDSimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPDSimple[i][j]); \
+                                                       } \
+                                               }
+
+#define cmpi(i, j, x)  { if (!feq(x, g_DPISimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPISimple[i][j]); \
+                                                       } \
+                                               }
+
+#else
+
+#define cmpm(i, j, x)  /* empty */
+#define cmpd(i, j, x)  /* empty */
+#define cmpi(i, j, x)  /* empty */
+
+#endif
+
+static void AllocSave(unsigned LA, unsigned LB)
+       {
+#if    CMP_SIMPLE
+       GetSimpleDPMxs(&g_DPMSimpleMx, &g_DPDSimpleMx, &g_DPISimpleMx);
+       g_DPMSimple = g_DPMSimpleMx->GetData();
+       g_DPDSimple = g_DPDSimpleMx->GetData();
+       g_DPISimple = g_DPISimpleMx->GetData();
+#endif
+       g_MxDPM.Alloc("FastM", LA+1, LB+1);\r
+       g_MxDPD.Alloc("FastD", LA+1, LB+1);\r
+       g_MxDPI.Alloc("FastI", LA+1, LB+1);\r
+\r
+       g_MxTBM.Alloc("FastTBM", LA+1, LB+1);\r
+       g_MxTBD.Alloc("FastTBD", LA+1, LB+1);\r
+       g_MxTBI.Alloc("FastTBI", LA+1, LB+1);\r
+\r
+       g_DPM = g_MxDPM.GetData();\r
+       g_DPD = g_MxDPD.GetData();\r
+       g_DPI = g_MxDPI.GetData();\r
+\r
+       g_TBM = g_MxTBM.GetData();\r
+       g_TBD = g_MxTBD.GetData();\r
+       g_TBI = g_MxTBI.GetData();\r
+       }
+
+static void SAVE_DPM(unsigned i, unsigned j, float x)
+       {
+       g_DPM[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPMSimple[i][j]));
+#endif
+       }
+
+static void SAVE_DPD(unsigned i, unsigned j, float x)
+       {
+       g_DPD[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPDSimple[i][j]));
+#endif
+       }
+
+static void SAVE_DPI(unsigned i, unsigned j, float x)
+       {
+       g_DPI[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPISimple[i][j]));
+#endif
+       }
+
+static void SAVE_TBM(unsigned i, unsigned j, char x)
+       {
+       g_TBM[i][j] = x;
+       }
+
+static void SAVE_TBD(unsigned i, unsigned j, char x)
+       {
+       g_TBD[i][j] = x;
+       }
+
+static void SAVE_TBI(unsigned i, unsigned j, char x)
+       {
+       g_TBI[i][j] = x;
+       }
+
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I)
+       {
+       *M = &g_MxDPM;
+       *D = &g_MxDPD;
+       *I = &g_MxDPI;
+       }
+
+#else  // SAVE_FAST
+
+#define        SAVE_DPM(i, j, x)       /* empty */
+#define        SAVE_DPD(i, j, x)       /* empty */
+#define        SAVE_DPI(i, j, x)       /* empty */
+
+#define        SAVE_TBM(i, j, x)       /* empty */
+#define        SAVE_TBD(i, j, x)       /* empty */
+#define        SAVE_TBI(i, j, x)       /* empty */
+
+#define AllocSave(LA, LB)      /* empty */
+
+#define cmpm(i, j, x)  /* empty */
+#define cmpd(i, j, x)  /* empty */
+#define cmpi(i, j, x)  /* empty */
+
+#endif // SAVE_FAST
+
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,
+  const AlnParams &AP, PathData &PD)
+       {
+       if (LA*LB > 100*1000*1000)
+               Die("ViterbiFast, too long LA=%u, LB=%u", LA, LB);
+
+       AllocBit(LA, LB);
+       AllocSave(LA, LB);
+       
+       StartTimer(ViterbiFast);
+
+       const float * const *Mx = AP.SubstMx;
+       float OpenA = AP.LOpenA;
+       float ExtA = AP.LExtA;
+
+       byte **TB = g_TBBit;
+       float *Mrow = g_DPRow1;
+       float *Drow = g_DPRow2;
+
+// Use Mrow[-1], so...
+       Mrow[-1] = MINUS_INFINITY;
+       for (unsigned j = 0; j <= LB; ++j)
+               {
+               Mrow[j] = MINUS_INFINITY;
+               SAVE_DPM(0, j, MINUS_INFINITY);
+               SAVE_TBM(0, j, '?');
+
+               Drow[j] = MINUS_INFINITY;
+               SAVE_DPD(0, j, MINUS_INFINITY);
+               SAVE_TBD(0, j, '?');
+               }
+       
+// Main loop
+       float M0 = float (0);
+       SAVE_DPM(0, 0, 0);
+       for (unsigned i = 0; i < LA; ++i)
+               {
+               byte a = A[i];
+               const float *MxRow = Mx[a];
+               float OpenB = AP.LOpenB;
+               float ExtB = AP.LExtB;
+               float I0 = MINUS_INFINITY;
+
+               SAVE_TBM(i, 0, '?');
+
+               SAVE_DPI(i, 0, MINUS_INFINITY);
+               SAVE_DPI(i, 1, MINUS_INFINITY);
+
+               SAVE_TBI(i, 0, '?');
+               SAVE_TBI(i, 1, '?');
+               
+               byte *TBrow = TB[i];
+               for (unsigned j = 0; j < LB; ++j)
+                       {
+                       byte b = B[j];
+                       byte TraceBits = 0;
+                       float SavedM0 = M0;
+
+               // MATCH
+                       {
+               // M0 = DPM[i][j]
+               // I0 = DPI[i][j]
+               // Drow[j] = DPD[i][j]
+                       cmpm(i, j, M0);
+                       cmpd(i, j, Drow[j]);
+                       cmpi(i, j, I0);
+
+                       float xM = M0;
+                       SAVE_TBM(i+1, j+1, 'M');
+                       if (Drow[j] > xM)
+                               {
+                               xM = Drow[j];
+                               TraceBits = TRACEBITS_DM;
+                               SAVE_TBM(i+1, j+1, 'D');
+                               }
+                       if (I0 > xM)
+                               {
+                               xM = I0;
+                               TraceBits = TRACEBITS_IM;
+                               SAVE_TBM(i+1, j+1, 'I');
+                               }
+                       M0 = Mrow[j];
+                       cmpm(i, j+1, M0);
+
+                       Mrow[j] = xM + MxRow[b];
+               // Mrow[j] = DPM[i+1][j+1])
+                       SAVE_DPM(i+1, j+1, Mrow[j]);
+                       }
+                       
+               // DELETE
+                       {
+               // SavedM0 = DPM[i][j]
+               // Drow[j] = DPD[i][j]
+                       cmpm(i, j, SavedM0);
+                       cmpd(i, j, Drow[j]);
+
+                       float md = SavedM0 + OpenB;
+                       Drow[j] += ExtB;
+                       SAVE_TBD(i+1, j, 'D');
+                       if (md >= Drow[j])
+                               {
+                               Drow[j] = md;
+                               TraceBits |= TRACEBITS_MD;
+                               SAVE_TBD(i+1, j, 'M');
+                               }
+               // Drow[j] = DPD[i+1][j]
+                       SAVE_DPD(i+1, j, Drow[j]);
+                       }
+                       
+               // INSERT
+                       {
+               // SavedM0 = DPM[i][j]
+               // I0 = DPI[i][j]
+                       cmpm(i, j, SavedM0);
+                       cmpi(i, j, I0);
+                       
+                       float mi = SavedM0 + OpenA;
+                       I0 += ExtA;
+                       SAVE_TBI(i, j+1, 'I');
+                       if (mi >= I0)
+                               {
+                               I0 = mi;
+                               TraceBits |= TRACEBITS_MI;
+                               SAVE_TBI(i, j+1, 'M');
+                               }
+               // I0 = DPI[i][j+1]
+                       SAVE_DPI(i, j+1, I0);
+                       }
+                       
+                       OpenB = AP.OpenB;
+                       ExtB = AP.ExtB;
+                       
+                       TBrow[j] = TraceBits;
+                       }
+               
+       // Special case for end of Drow[]
+               {
+       // M0 = DPM[i][LB]
+       // Drow[LB] = DPD[i][LB]
+               
+               TBrow[LB] = 0;
+               float md = M0 + AP.ROpenB;
+               Drow[LB] += AP.RExtB;
+               SAVE_TBD(i+1, LB, 'D');
+               if (md >= Drow[LB])
+                       {
+                       Drow[LB] = md;
+                       TBrow[LB] = TRACEBITS_MD;
+                       SAVE_TBD(i+1, LB, 'M');
+                       }
+       // Drow[LB] = DPD[i+1][LB]
+               SAVE_DPD(i+1, LB, Drow[LB]);
+               }
+               
+               SAVE_DPM(i+1, 0, MINUS_INFINITY);
+               M0 = MINUS_INFINITY;
+
+               OpenA = AP.OpenA;
+               ExtA = AP.ExtA;
+               }
+       
+       SAVE_TBM(LA, 0, '?');
+
+// Special case for last row of DPI
+       byte *TBrow = TB[LA];
+       float I1 = MINUS_INFINITY;
+
+       SAVE_DPI(LA, 0, MINUS_INFINITY);
+       SAVE_TBI(LA, 0, '?');
+
+       SAVE_DPI(LA, 1, MINUS_INFINITY);
+       SAVE_TBI(LA, 1, '?');
+
+       for (unsigned j = 1; j < LB; ++j)
+               {
+       // Mrow[j-1] = DPM[LA][j]
+       // I1 = DPI[LA][j]
+               
+               TBrow[j] = 0;
+               float mi = Mrow[int(j)-1] + AP.ROpenA;
+               I1 += AP.RExtA;
+               SAVE_TBI(LA, j+1, 'I');
+               if (mi > I1)
+                       {
+                       I1 = mi;
+                       TBrow[j] = TRACEBITS_MI;
+                       SAVE_TBI(LA, j+1, 'M');
+                       }
+               SAVE_DPI(LA, j+1, I1);
+               }
+       
+       float FinalM = Mrow[LB-1];
+       float FinalD = Drow[LB];
+       float FinalI = I1;
+// FinalM = DPM[LA][LB]
+// FinalD = DPD[LA][LB]
+// FinalI = DPI[LA][LB]
+       
+       float Score = FinalM;
+       byte State = 'M';
+       if (FinalD > Score)
+               {
+               Score = FinalD;
+               State = 'D';
+               }
+       if (FinalI > Score)
+               {
+               Score = FinalI;
+               State = 'I';
+               }
+
+       EndTimer(ViterbiFast);
+       TraceBackBit(LA, LB, State, PD);
+
+#if    SAVE_FAST
+       g_MxDPM.LogMe();
+       g_MxDPD.LogMe();
+       g_MxDPI.LogMe();
+
+       g_MxTBM.LogMe();
+       g_MxTBD.LogMe();
+       g_MxTBI.LogMe();
+#endif
+
+       return Score;
+       }
diff --git a/uchime_src/windex.h b/uchime_src/windex.h
new file mode 100644 (file)
index 0000000..0b324ca
--- /dev/null
@@ -0,0 +1,71 @@
+#ifndef windex_h\r
+#define windex_h\r
+\r
+class SFasta;\r
+struct SeqDB;\r
+\r
+typedef uint32 word_t;\r
+typedef uint16 wordcount_t;\r
+typedef uint32 arrsize_t;\r
+typedef uint16 seqcountperword_t;\r
+typedef uint32 seqindex_t;\r
+typedef uint16 commonwordcount_t;\r
+\r
+const uint32 WindexFileHdr_Magic1 = 0x312DE41;\r
+const uint32 WindexFileHdr_Magic2 = 0x312DE42;\r
+const uint32 WindexFileHdr_Magic3 = 0x312DE43;\r
+const uint32 WindexFileHdr_Magic4 = 0x312DE44;\r
+\r
+struct WindexFileHdr\r
+       {\r
+       uint32 Magic1;\r
+       uint32 IsNucleo;\r
+       uint32 WordLength;\r
+       uint32 Magic2;\r
+       };\r
+\r
+class Windex\r
+       {\r
+public:\r
+       bool m_Nucleo;\r
+       bool m_RedAlpha;\r
+       unsigned m_WordLength;\r
+       unsigned m_AlphaSize;\r
+       unsigned m_WordCount;\r
+       unsigned m_Hi;\r
+       unsigned m_CapacityInc;\r
+       arrsize_t *m_Capacities;\r
+       arrsize_t *m_Sizes;\r
+       float *m_WordScores;\r
+       seqindex_t **m_SeedIndexes;\r
+       byte *m_UniqueCounts;\r
+       unsigned m_CharToLetter[256];\r
+\r
+public:\r
+       Windex();\r
+       void ToFile(const string &FileName) const;\r
+       void FromFile(const string &FileName);\r
+       void FromSFasta(SFasta &SF);\r
+       void FromSeqDB(const SeqDB &DB);\r
+       void Clear(bool ctor = false);\r
+       void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N);\r
+       void Init(bool Nucleo, unsigned WordLength);\r
+       void Init2(bool Nucleo, unsigned TableSize);\r
+       void InitRed(unsigned WordLength);\r
+       void InitWordScores(const float *const *SubstMx);\r
+       void Reset();\r
+       void LogMe() const;\r
+       unsigned LogMemSize() const;\r
+       void LogWordStats(unsigned TopWords = 10) const;\r
+       const char *WordToStr(word_t Word) const;\r
+       word_t SeqToWord(const byte *Seq) const;\r
+       unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const;\r
+       unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const;\r
+       unsigned WordsToCounts(const word_t *Words, unsigned N,\r
+         word_t *UniqueWords, seqcountperword_t *Counts) const;\r
+       unsigned GetUniqueWords(const word_t *Words, unsigned N,\r
+         word_t *UniqueWords) const;\r
+       void LogSizeHisto() const;\r
+       };\r
+\r
+#endif // windex_h\r
diff --git a/uchime_src/writechhit.cpp b/uchime_src/writechhit.cpp
new file mode 100644 (file)
index 0000000..ea67061
--- /dev/null
@@ -0,0 +1,329 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+\r
+void WriteChimeFileHdr(FILE *f)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       fprintf(f,\r
+               "\tQuery"               // 1\r
+               "\tA"                   // 2\r
+               "\tB"                   // 3\r
+               "\tIdQM"                // 4\r
+               "\tIdQA"                // 5\r
+               "\tIdQB"                // 6\r
+               "\tIdAB"                // 7\r
+               "\tIdQT"                // 8\r
+               "\tLY"                  // 9\r
+               "\tLN"                  // 10\r
+               "\tLA"                  // 11\r
+               "\tRY"                  // 12\r
+               "\tRN"                  // 13\r
+               "\tRA"                  // 14\r
+               "\tDiv"                 // 15\r
+               "\tY"                   // 16\r
+               "\n"\r
+               );\r
+       }\r
+\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               {\r
+               fprintf(f, "0.0000");           // 0\r
+\r
+               fprintf(f,\r
+                 "\t%s", Hit.QLabel.c_str());  // 1\r
+\r
+               fprintf(f,\r
+                 "\t*"                                         // 2\r
+                 "\t*"                                         // 3\r
+                 "\t*"                                         // 4\r
+                 "\t*"                                         // 5\r
+                 "\t*"                                         // 6\r
+                 "\t*"                                         // 7\r
+                 "\t*"                                         // 8\r
+                 "\t*"                                         // 9\r
+                 "\t*"                                         // 10\r
+                 "\t*"                                         // 11\r
+                 "\t*"                                         // 12\r
+                 "\t*"                                         // 13\r
+                 "\t*"                                         // 14\r
+                 "\t*"                                         // 15\r
+                 "\tN"                                         // 16\r
+                 "\n"\r
+                 );\r
+               return;\r
+               }\r
+\r
+       fprintf(f, "%.4f", Hit.Score);          // 0\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.QLabel.c_str(), f);           // 1\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.ALabel.c_str(), f);           // 2\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.BLabel.c_str(), f);           // 3\r
+\r
+       fprintf(f, "\t%.1f", Hit.PctIdQM);      // 4\r
+       fprintf(f, "\t%.1f", Hit.PctIdQA);      // 5\r
+       fprintf(f, "\t%.1f", Hit.PctIdQB);      // 6\r
+       fprintf(f, "\t%.1f", Hit.PctIdAB);      // 7\r
+       fprintf(f, "\t%.1f", Hit.PctIdQT);      // 8\r
+\r
+       fprintf(f, "\t%u", Hit.CS_LY);          // 9\r
+       fprintf(f, "\t%u", Hit.CS_LN);          // 10\r
+       fprintf(f, "\t%u", Hit.CS_LA);          // 11\r
+\r
+       fprintf(f, "\t%u", Hit.CS_RY);          // 12\r
+       fprintf(f, "\t%u", Hit.CS_RN);          // 13\r
+       fprintf(f, "\t%u", Hit.CS_RA);          // 14\r
+\r
+       fprintf(f, "\t%.2f", Hit.Div);          // 15\r
+\r
+       fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
+       fputc('\n', f);\r
+       }\r
+\r
+unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
+       {\r
+       unsigned UL = 0;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               if (!isgap(Seq[i]))\r
+                       ++UL;\r
+       return UL;\r
+       }\r
+\r
+void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               return;\r
+\r
+       const string &Q3 = Hit.Q3;\r
+       const string &A3 = Hit.A3;\r
+       const string &B3 = Hit.B3;\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+// Aligned\r
+       unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
+       unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
+       unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
+\r
+       fprintf(f, "\n");\r
+       fprintf(f, "------------------------------------------------------------------------\n");\r
+       fprintf(f, "Query   (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
+       fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
+       fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
+\r
+// Strip terminal gaps in query\r
+       unsigned FromCol = UINT_MAX;\r
+       unsigned ToCol = UINT_MAX;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (!isgap(Q3Seq[Col]))\r
+                       {\r
+                       if (FromCol == UINT_MAX)\r
+                               FromCol = Col;\r
+                       ToCol = Col;\r
+                       }\r
+               }\r
+\r
+       unsigned QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned BPos = 0;\r
+       for (unsigned Col = 0; Col < FromCol; ++Col)\r
+               {\r
+               if (!isgap(A3Seq[Col]))\r
+                       ++APos;\r
+               if (!isgap(B3Seq[Col]))\r
+                       ++BPos;\r
+               }\r
+\r
+       unsigned Range = ToCol - FromCol + 1;\r
+       unsigned RowCount = (Range + 79)/80;\r
+       unsigned RowFromCol = FromCol;\r
+       for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
+               {\r
+               fprintf(f, "\n");\r
+               unsigned RowToCol = RowFromCol + 79;\r
+               if (RowToCol > ToCol)\r
+                       RowToCol = ToCol;\r
+\r
+       // A row\r
+               fprintf(f, "A %5u ", APos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       if (a != q)\r
+                               a = tolower(a);\r
+                       fprintf(f, "%c", a);\r
+                       if (!isgap(a))\r
+                               ++APos;\r
+                       }\r
+               fprintf(f, " %u\n", APos);\r
+\r
+       // Q row\r
+               fprintf(f, "Q %5u ", QPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       fprintf(f, "%c", q);\r
+                       if (!isgap(q))\r
+                               ++QPos;\r
+                       }\r
+               fprintf(f, " %u\n", QPos);\r
+\r
+       // B row\r
+               fprintf(f, "B %5u ", BPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+                       if (b != q)\r
+                               b = tolower(b);\r
+                       fprintf(f, "%c", b);\r
+                       if (!isgap(b))\r
+                               ++BPos;\r
+                       }\r
+               fprintf(f, " %u\n", BPos);\r
+\r
+       // Diffs\r
+               fprintf(f, "Diffs   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b))\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = 'A';\r
+                               else if (q == b && q != a)\r
+                                       c = 'b';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = 'B';\r
+                               else if (q == a && q != b)\r
+                                       c = 'a';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // SNPs\r
+               fprintf(f, "Votes   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
+                       bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = '+';\r
+                               else if (q == b && q != a)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = '+';\r
+                               else if (q == a && q != b)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // LR row\r
+               fprintf(f, "Model   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       if (Col < Hit.ColXLo)\r
+                               fprintf(f, "A");\r
+                       else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
+                               fprintf(f, "x");\r
+                       else\r
+                               fprintf(f, "B");\r
+                       }\r
+\r
+               fprintf(f, "\n");\r
+\r
+               RowFromCol += 80;\r
+               }\r
+       fprintf(f, "\n");\r
+\r
+       double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
+       double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
+\r
+       unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
+       unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
+\r
+       double PctL = Pct(Hit.CS_LY, LTot);\r
+       double PctR = Pct(Hit.CS_RY, RTot);\r
+\r
+       fprintf(f,\r
+         "Ids.  QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
+         Hit.PctIdQA,\r
+         Hit.PctIdQB,\r
+         Hit.PctIdAB,\r
+         Hit.PctIdQM,\r
+         Div);\r
+\r
+       fprintf(f,\r
+         "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
+         LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
+         RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
+         Hit.Score);\r
+       }\r