A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
- A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; };
- A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; };
- A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; };
- A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365A137DAB8300332B0C /* alnparams.cpp */; };
- A74D368B137DAB8400332B0C /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365C137DAB8300332B0C /* alpha.cpp */; };
- A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365E137DAB8300332B0C /* alpha2.cpp */; };
- A74D368D137DAB8400332B0C /* fractid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3664137DAB8300332B0C /* fractid.cpp */; };
- A74D368E137DAB8400332B0C /* getparents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3665137DAB8300332B0C /* getparents.cpp */; };
- A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3666137DAB8300332B0C /* globalalign2.cpp */; };
- A74D3690137DAB8400332B0C /* make3way.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366A137DAB8300332B0C /* make3way.cpp */; };
- A74D3691137DAB8400332B0C /* mx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366B137DAB8300332B0C /* mx.cpp */; };
- A74D3692137DAB8400332B0C /* myutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366E137DAB8300332B0C /* myutils.cpp */; };
- A74D3693137DAB8400332B0C /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3672137DAB8300332B0C /* path.cpp */; };
- A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3674137DAB8300332B0C /* searchchime.cpp */; };
- A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3676137DAB8300332B0C /* seqdb.cpp */; };
- A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3678137DAB8300332B0C /* setnucmx.cpp */; };
- A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3679137DAB8300332B0C /* sfasta.cpp */; };
- A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D367F137DAB8300332B0C /* tracebackbit.cpp */; };
- A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3681137DAB8300332B0C /* uchime_main.cpp */; };
- A74D369A137DAB8400332B0C /* usort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3683137DAB8300332B0C /* usort.cpp */; };
- A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3684137DAB8300332B0C /* viterbifast.cpp */; };
- A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; };
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurmetastats.cpp; sourceTree = "<group>"; };
A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
- A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = "<group>"; };
- A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = "<group>"; };
- A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = "<group>"; };
- A74D3658137DAB8300332B0C /* allocs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocs.h; sourceTree = "<group>"; };
- A74D3659137DAB8300332B0C /* alnheuristics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnheuristics.h; sourceTree = "<group>"; };
- A74D365A137DAB8300332B0C /* alnparams.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alnparams.cpp; sourceTree = "<group>"; };
- A74D365B137DAB8300332B0C /* alnparams.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnparams.h; sourceTree = "<group>"; };
- A74D365C137DAB8300332B0C /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
- A74D365D137DAB8300332B0C /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
- A74D365E137DAB8300332B0C /* alpha2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha2.cpp; sourceTree = "<group>"; };
- A74D365F137DAB8300332B0C /* chainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chainer.h; sourceTree = "<group>"; };
- A74D3660137DAB8300332B0C /* chime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chime.h; sourceTree = "<group>"; };
- A74D3661137DAB8300332B0C /* diagbox.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diagbox.h; sourceTree = "<group>"; };
- A74D3662137DAB8300332B0C /* dp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dp.h; sourceTree = "<group>"; };
- A74D3663137DAB8300332B0C /* evalue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = evalue.h; sourceTree = "<group>"; };
- A74D3664137DAB8300332B0C /* fractid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fractid.cpp; sourceTree = "<group>"; };
- A74D3665137DAB8300332B0C /* getparents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getparents.cpp; sourceTree = "<group>"; };
- A74D3666137DAB8300332B0C /* globalalign2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = globalalign2.cpp; sourceTree = "<group>"; };
- A74D3667137DAB8300332B0C /* help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = help.h; sourceTree = "<group>"; };
- A74D3668137DAB8300332B0C /* hsp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hsp.h; sourceTree = "<group>"; };
- A74D3669137DAB8300332B0C /* hspfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hspfinder.h; sourceTree = "<group>"; };
- A74D366A137DAB8300332B0C /* make3way.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = make3way.cpp; sourceTree = "<group>"; };
- A74D366B137DAB8300332B0C /* mx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mx.cpp; sourceTree = "<group>"; };
- A74D366C137DAB8300332B0C /* mx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mx.h; sourceTree = "<group>"; };
- A74D366D137DAB8300332B0C /* myopts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myopts.h; sourceTree = "<group>"; };
- A74D366E137DAB8300332B0C /* myutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myutils.cpp; sourceTree = "<group>"; };
- A74D366F137DAB8300332B0C /* myutils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myutils.h; sourceTree = "<group>"; };
- A74D3670137DAB8300332B0C /* orf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = orf.h; sourceTree = "<group>"; };
- A74D3671137DAB8300332B0C /* out.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = out.h; sourceTree = "<group>"; };
- A74D3672137DAB8300332B0C /* path.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = path.cpp; sourceTree = "<group>"; };
- A74D3673137DAB8300332B0C /* path.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = path.h; sourceTree = "<group>"; };
- A74D3674137DAB8300332B0C /* searchchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = searchchime.cpp; sourceTree = "<group>"; };
- A74D3675137DAB8300332B0C /* seq.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seq.h; sourceTree = "<group>"; };
- A74D3676137DAB8300332B0C /* seqdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqdb.cpp; sourceTree = "<group>"; };
- A74D3677137DAB8300332B0C /* seqdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqdb.h; sourceTree = "<group>"; };
- A74D3678137DAB8300332B0C /* setnucmx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setnucmx.cpp; sourceTree = "<group>"; };
- A74D3679137DAB8300332B0C /* sfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sfasta.cpp; sourceTree = "<group>"; };
- A74D367A137DAB8300332B0C /* sfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sfasta.h; sourceTree = "<group>"; };
- A74D367B137DAB8300332B0C /* svnmods.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnmods.h; sourceTree = "<group>"; };
- A74D367C137DAB8300332B0C /* svnversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnversion.h; sourceTree = "<group>"; };
- A74D367D137DAB8300332B0C /* timers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timers.h; sourceTree = "<group>"; };
- A74D367E137DAB8300332B0C /* timing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timing.h; sourceTree = "<group>"; };
- A74D367F137DAB8300332B0C /* tracebackbit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tracebackbit.cpp; sourceTree = "<group>"; };
- A74D3680137DAB8300332B0C /* uc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uc.h; sourceTree = "<group>"; };
- A74D3681137DAB8300332B0C /* uchime_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uchime_main.cpp; sourceTree = "<group>"; };
- A74D3682137DAB8300332B0C /* ultra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ultra.h; sourceTree = "<group>"; };
- A74D3683137DAB8300332B0C /* usort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = usort.cpp; sourceTree = "<group>"; };
- A74D3684137DAB8300332B0C /* viterbifast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = viterbifast.cpp; sourceTree = "<group>"; };
- A74D3685137DAB8300332B0C /* windex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = windex.h; sourceTree = "<group>"; };
- A74D3686137DAB8300332B0C /* writechhit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = writechhit.cpp; sourceTree = "<group>"; };
A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
name = Products;
sourceTree = "<group>";
};
- A74D3644137DA7CE00332B0C /* uchime */ = {
- isa = PBXGroup;
- children = (
- A74D3655137DAB8300332B0C /* addtargets2.cpp */,
- A74D3656137DAB8300332B0C /* alignchime.cpp */,
- A74D3657137DAB8300332B0C /* alignchimel.cpp */,
- A74D3658137DAB8300332B0C /* allocs.h */,
- A74D3659137DAB8300332B0C /* alnheuristics.h */,
- A74D365A137DAB8300332B0C /* alnparams.cpp */,
- A74D365B137DAB8300332B0C /* alnparams.h */,
- A74D365C137DAB8300332B0C /* alpha.cpp */,
- A74D365D137DAB8300332B0C /* alpha.h */,
- A74D365E137DAB8300332B0C /* alpha2.cpp */,
- A74D365F137DAB8300332B0C /* chainer.h */,
- A74D3660137DAB8300332B0C /* chime.h */,
- A74D3661137DAB8300332B0C /* diagbox.h */,
- A74D3662137DAB8300332B0C /* dp.h */,
- A74D3663137DAB8300332B0C /* evalue.h */,
- A74D3664137DAB8300332B0C /* fractid.cpp */,
- A74D3665137DAB8300332B0C /* getparents.cpp */,
- A74D3666137DAB8300332B0C /* globalalign2.cpp */,
- A74D3667137DAB8300332B0C /* help.h */,
- A74D3668137DAB8300332B0C /* hsp.h */,
- A74D3669137DAB8300332B0C /* hspfinder.h */,
- A74D366A137DAB8300332B0C /* make3way.cpp */,
- A74D366B137DAB8300332B0C /* mx.cpp */,
- A74D366C137DAB8300332B0C /* mx.h */,
- A74D366D137DAB8300332B0C /* myopts.h */,
- A74D366E137DAB8300332B0C /* myutils.cpp */,
- A74D366F137DAB8300332B0C /* myutils.h */,
- A74D3670137DAB8300332B0C /* orf.h */,
- A74D3671137DAB8300332B0C /* out.h */,
- A74D3672137DAB8300332B0C /* path.cpp */,
- A74D3673137DAB8300332B0C /* path.h */,
- A74D3674137DAB8300332B0C /* searchchime.cpp */,
- A74D3675137DAB8300332B0C /* seq.h */,
- A74D3676137DAB8300332B0C /* seqdb.cpp */,
- A74D3677137DAB8300332B0C /* seqdb.h */,
- A74D3678137DAB8300332B0C /* setnucmx.cpp */,
- A74D3679137DAB8300332B0C /* sfasta.cpp */,
- A74D367A137DAB8300332B0C /* sfasta.h */,
- A74D367B137DAB8300332B0C /* svnmods.h */,
- A74D367C137DAB8300332B0C /* svnversion.h */,
- A74D367D137DAB8300332B0C /* timers.h */,
- A74D367E137DAB8300332B0C /* timing.h */,
- A74D367F137DAB8300332B0C /* tracebackbit.cpp */,
- A74D3680137DAB8300332B0C /* uc.h */,
- A74D3681137DAB8300332B0C /* uchime_main.cpp */,
- A74D3682137DAB8300332B0C /* ultra.h */,
- A74D3683137DAB8300332B0C /* usort.cpp */,
- A74D3684137DAB8300332B0C /* viterbifast.cpp */,
- A74D3685137DAB8300332B0C /* windex.h */,
- A74D3686137DAB8300332B0C /* writechhit.cpp */,
- );
- name = uchime;
- sourceTree = "<group>";
- };
A7D161E7149F7F50000523E8 /* fortran */ = {
isa = PBXGroup;
children = (
A7E9BA4512D3965600DA6239 /* chimera */ = {
isa = PBXGroup;
children = (
- A74D3644137DA7CE00332B0C /* uchime */,
A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */,
A7E9B65D12D37EC300DA6239 /* bellerophon.h */,
A7E9B67412D37EC400DA6239 /* ccode.cpp */,
A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */,
A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */,
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */,
- A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */,
- A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */,
- A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */,
- A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */,
- A74D368B137DAB8400332B0C /* alpha.cpp in Sources */,
- A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */,
- A74D368D137DAB8400332B0C /* fractid.cpp in Sources */,
- A74D368E137DAB8400332B0C /* getparents.cpp in Sources */,
- A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */,
- A74D3690137DAB8400332B0C /* make3way.cpp in Sources */,
- A74D3691137DAB8400332B0C /* mx.cpp in Sources */,
- A74D3692137DAB8400332B0C /* myutils.cpp in Sources */,
- A74D3693137DAB8400332B0C /* path.cpp in Sources */,
- A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */,
- A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */,
- A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */,
- A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */,
- A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */,
- A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */,
- A74D369A137DAB8400332B0C /* usort.cpp in Sources */,
- A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */,
- A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */,
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */,
A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */,
+++ /dev/null
-//#if UCHIMES\r
-\r
-#include "myutils.h"\r
-#include "chime.h"\r
-#include "ultra.h"\r
-#include <set>\r
-\r
-const float MAX_WORD_COUNT_DROP = 1;\r
-\r
-void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path);\r
-void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts,\r
- vector<unsigned> &Order);\r
-\r
-void AddTargets(SeqDB &DB, const SeqData &Query, set<unsigned> &TargetIndexes)\r
- {\r
- const unsigned SeqCount = DB.GetSeqCount();\r
- if (SeqCount == 0)\r
- return;\r
-\r
- vector<float> WordCounts;\r
- vector<unsigned> Order;\r
- USort(Query, DB, WordCounts, Order);\r
- asserta(SIZE(Order) == SeqCount);\r
- unsigned TopSeqIndex = Order[0];\r
- float TopWordCount = WordCounts[TopSeqIndex];\r
- for (unsigned i = 0; i < SeqCount; ++i)\r
- {\r
- unsigned SeqIndex = Order[i];\r
- float WordCount = WordCounts[SeqIndex];\r
- if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP)\r
- return;\r
- TargetIndexes.insert(SeqIndex);\r
- }\r
- }\r
-\r
-//#endif\r
+++ /dev/null
-#include "myutils.h"\r
-#include "seq.h"\r
-#include "chime.h"\r
-#include "dp.h"\r
-\r
-#define TRACE 0\r
-#define TRACE_BS 0\r
-\r
-void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
- const string &PathQA, const string &PathQB,\r
- string &Q3, string &A3, string &B3);\r
-\r
-void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
- const string &QLabel, const string &ALabel, const string &BLabel,\r
- ChimeHit2 &Hit);\r
-\r
-double GetScore2(double Y, double N, double A)\r
- {\r
- return Y/(opt_xn*(N + opt_dn) + opt_xa*A);\r
- }\r
-\r
-void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
- const string &QLabel, const string &ALabel, const string &BLabel,\r
- ChimeHit2 &Hit)\r
- {\r
- Hit.Clear();\r
- Hit.QLabel = QLabel;\r
-\r
- const byte *Q3Seq = (const byte *) Q3.c_str();\r
- const byte *A3Seq = (const byte *) A3.c_str();\r
- const byte *B3Seq = (const byte *) B3.c_str();\r
-\r
- const unsigned ColCount = SIZE(Q3);\r
- asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
-\r
-#if TRACE\r
- Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq);\r
- Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq);\r
- Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq);\r
-#endif\r
-\r
-// Discard terminal gaps\r
- unsigned ColLo = UINT_MAX;\r
- unsigned ColHi = UINT_MAX;\r
- for (unsigned Col = 2; Col + 2 < ColCount; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (isacgt(q) && isacgt(a) && isacgt(b))\r
- {\r
- if (ColLo == UINT_MAX)\r
- ColLo = Col;\r
- ColHi = Col;\r
- }\r
- }\r
-\r
- if (ColLo == UINT_MAX)\r
- return;\r
-\r
- unsigned QPos = 0;\r
- unsigned APos = 0;\r
- unsigned BPos = 0;\r
- unsigned DiffCount = 0;\r
-\r
- vector<unsigned> ColToQPos(ColLo, UINT_MAX);\r
- vector<unsigned> AccumCount(ColLo, UINT_MAX);\r
- vector<unsigned> AccumSameA(ColLo, UINT_MAX);\r
- vector<unsigned> AccumSameB(ColLo, UINT_MAX);\r
- vector<unsigned> AccumForA(ColLo, UINT_MAX);\r
- vector<unsigned> AccumForB(ColLo, UINT_MAX);\r
- vector<unsigned> AccumAbstain(ColLo, UINT_MAX);\r
- vector<unsigned> AccumAgainst(ColLo, UINT_MAX);\r
-\r
- unsigned SumSameA = 0;\r
- unsigned SumSameB = 0;\r
- unsigned SumSameAB = 0;\r
- unsigned Sum = 0;\r
- unsigned SumForA = 0;\r
- unsigned SumForB = 0;\r
- unsigned SumAbstain = 0;\r
- unsigned SumAgainst = 0;\r
- for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (isacgt(q) && isacgt(a) && isacgt(b))\r
- {\r
- if (q == a)\r
- ++SumSameA;\r
- if (q == b)\r
- ++SumSameB;\r
- if (a == b)\r
- ++SumSameAB;\r
- if (q == a && q != b)\r
- ++SumForA;\r
- if (q == b && q != a)\r
- ++SumForB;\r
- if (a == b && q != a)\r
- ++SumAgainst;\r
- if (q != a && q != b)\r
- ++SumAbstain;\r
- ++Sum;\r
- }\r
-\r
- ColToQPos.push_back(QPos);\r
- AccumSameA.push_back(SumSameA);\r
- AccumSameB.push_back(SumSameB);\r
- AccumCount.push_back(Sum);\r
- AccumForA.push_back(SumForA);\r
- AccumForB.push_back(SumForB);\r
- AccumAbstain.push_back(SumAbstain);\r
- AccumAgainst.push_back(SumAgainst);\r
-\r
- if (q != '-')\r
- ++QPos;\r
- if (a != '-')\r
- ++APos;\r
- if (b != '-')\r
- ++BPos;\r
- }\r
-\r
- asserta(SIZE(ColToQPos) == ColHi+1);\r
- asserta(SIZE(AccumSameA) == ColHi+1);\r
- asserta(SIZE(AccumSameB) == ColHi+1);\r
- asserta(SIZE(AccumAbstain) == ColHi+1);\r
- asserta(SIZE(AccumAgainst) == ColHi+1);\r
-\r
- double IdQA = double(SumSameA)/Sum;\r
- double IdQB = double(SumSameB)/Sum;\r
- double IdAB = double(SumSameAB)/Sum;\r
- double MaxId = max(IdQA, IdQB);\r
-\r
-#if TRACE\r
- Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0);\r
- Log("\n");\r
- Log(" x AQB IdAL IdBL IdAR IdBR DivAB DivBA YAL YBL YAR YBR AbL AbR ScoreAB ScoreAB XLo Xhi\n");\r
- Log("----- --- ----- ----- ----- ----- ------ ------ ----- ----- ----- ----- ----- ----- ------- ------- ----- -----\n");\r
-#endif\r
- unsigned BestXLo = UINT_MAX;\r
- unsigned BestXHi = UINT_MAX;\r
- double BestDiv = 0.0;\r
- double BestIdQM = 0.0;\r
- double BestScore = 0.0;\r
-\r
-// Find range of cols BestXLo..BestXHi that maximizes score\r
- bool FirstA = false;\r
-\r
-// NOTE: Must be < ColHi not <= because use Col+1 below\r
- for (unsigned Col = ColLo; Col < ColHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- unsigned SameAL = AccumSameA[Col];\r
- unsigned SameBL = AccumSameB[Col];\r
- unsigned SameAR = SumSameA - AccumSameA[Col];\r
- unsigned SameBR = SumSameB - AccumSameB[Col];\r
-\r
- double IdAB = double(SameAL + SameBR)/Sum;\r
- double IdBA = double(SameBL + SameAR)/Sum;\r
-\r
- unsigned ForAL = AccumForA[Col];\r
- unsigned ForBL = AccumForB[Col];\r
- unsigned ForAR = SumForA - AccumForA[Col+1];\r
- unsigned ForBR = SumForB - AccumForB[Col+1];\r
- unsigned AbL = AccumAbstain[Col];\r
- unsigned AbR = SumAbstain - AccumAbstain[Col+1];\r
-\r
- double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR);\r
- double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR);\r
- \r
- double DivAB = IdAB/MaxId;\r
- double DivBA = IdBA/MaxId;\r
- double MaxDiv = max(DivAB, DivBA);\r
-\r
- //if (MaxDiv > BestDiv)\r
- // {\r
- // BestDiv = MaxDiv;\r
- // BestXLo = Col;\r
- // BestXHi = Col;\r
- // FirstA = (DivAB > DivBA);\r
- // if (FirstA)\r
- // BestIdQM = IdAB;\r
- // else\r
- // BestIdQM = IdBA;\r
- // }\r
- //else if (MaxDiv == BestDiv)\r
- // BestXHi = Col;\r
-\r
- double MaxScore = max(ScoreAB, ScoreBA);\r
- if (MaxScore > BestScore)\r
- {\r
- BestScore = MaxScore;\r
- BestXLo = Col;\r
- BestXHi = Col;\r
- FirstA = (ScoreAB > ScoreBA);\r
- if (FirstA)\r
- BestIdQM = IdAB;\r
- else\r
- BestIdQM = IdBA;\r
- if (MaxDiv > BestDiv)\r
- BestDiv = MaxDiv;\r
- }\r
- else if (MaxScore == BestScore)\r
- {\r
- BestXHi = Col;\r
- if (MaxDiv > BestDiv)\r
- BestDiv = MaxDiv;\r
- }\r
-\r
-#if TRACE\r
- {\r
- Log("%5u", Col);\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- Log(" %c%c%c", a, q, b);\r
- Log(" %5u", SameAL);\r
- Log(" %5u", SameBL);\r
- Log(" %5u", SameAR);\r
- Log(" %5u", SameBR);\r
- Log(" %5.4f", DivAB);\r
- Log(" %5.4f", DivBA);\r
- Log(" %5u", ForAL);\r
- Log(" %5u", ForBL);\r
- Log(" %5u", ForAR);\r
- Log(" %5u", ForBR);\r
- Log(" %5u", AbL);\r
- Log(" %5u", AbR);\r
- Log(" %7.4f", ScoreAB);\r
- Log(" %7.4f", ScoreBA);\r
- if (BestXLo != UINT_MAX)\r
- Log(" %5u", BestXLo);\r
- if (BestXHi != UINT_MAX)\r
- Log(" %5u", BestXHi);\r
- Log("\n");\r
- }\r
-#endif\r
- }\r
-\r
- if (BestXLo == UINT_MAX)\r
- {\r
-#if TRACE\r
- Log("\n");\r
- Log("No crossover found.\n");\r
-#endif\r
- return;\r
- }\r
-#if TRACE\r
- Log("BestX col %u - %u\n", BestXLo, BestXHi);\r
-#endif\r
-\r
-// Find maximum region of identity within BestXLo..BestXHi\r
- unsigned ColXLo = (BestXLo + BestXHi)/2;\r
- unsigned ColXHi = ColXLo;\r
- unsigned SegLo = UINT_MAX;\r
- unsigned SegHi = UINT_MAX;\r
- for (unsigned Col = BestXLo; Col <= BestXHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (q == a && q == b)\r
- {\r
- if (SegLo == UINT_MAX)\r
- SegLo = Col;\r
- SegHi = Col;\r
- }\r
- else\r
- {\r
- unsigned SegLength = SegHi - SegLo + 1;\r
- unsigned BestSegLength = ColXHi - ColXLo + 1;\r
- if (SegLength > BestSegLength)\r
- {\r
- ColXLo = SegLo;\r
- ColXHi = SegHi;\r
- }\r
- SegLo = UINT_MAX;\r
- SegHi = UINT_MAX;\r
- }\r
- }\r
- unsigned SegLength = SegHi - SegLo + 1;\r
- unsigned BestSegLength = ColXHi - ColXLo + 1;\r
- if (SegLength > BestSegLength)\r
- {\r
- ColXLo = SegLo;\r
- ColXHi = SegHi;\r
- }\r
-\r
- QPos = 0;\r
- for (unsigned x = 0; x < ColCount; ++x)\r
- {\r
- if (x == ColXLo)\r
- Hit.QXLo = QPos;\r
- else if (x == ColXHi)\r
- {\r
- Hit.QXHi = QPos;\r
- break;\r
- }\r
- char q = Q3Seq[x];\r
- if (q != '-')\r
- ++QPos;\r
- }\r
-\r
- Hit.ColXLo = ColXLo;\r
- Hit.ColXHi = ColXHi;\r
-\r
- //if (FirstA)\r
- // {\r
- // Hit.LY = AccumForA[ColXLo];\r
- // Hit.LN = AccumForB[ColXLo];\r
-\r
- // Hit.RY = SumForB - AccumForB[ColXHi];\r
- // Hit.RN = SumForA - AccumForA[ColXHi];\r
- // }\r
- //else\r
- // {\r
- // Hit.LY = AccumForB[ColXLo];\r
- // Hit.LN = AccumForA[ColXLo];\r
- // Hit.RY = SumForA - AccumForA[ColXHi];\r
- // Hit.RN = SumForB - AccumForB[ColXHi];\r
- // }\r
-\r
- //Hit.LA = AccumAgainst[ColXLo];\r
- //Hit.LD = AccumAbstain[ColXLo];\r
-\r
- //Hit.RA = SumAgainst - AccumAgainst[ColXHi];\r
- //Hit.RD = SumAbstain - AccumAbstain[ColXHi];\r
-\r
- Hit.PctIdAB = IdAB*100.0;\r
- Hit.PctIdQM = BestIdQM*100.0;\r
-\r
- Hit.Div = (BestDiv - 1.0)*100.0;\r
-\r
- //Hit.QSD = QSD;\r
- Hit.Q3 = Q3;\r
- Hit.QLabel = QLabel;\r
- if (FirstA)\r
- {\r
- //Hit.ASD = ASD;\r
- //Hit.BSD = BSD;\r
- //Hit.PathQA = PathQA;\r
- //Hit.PathQB = PathQB;\r
- Hit.A3 = A3;\r
- Hit.B3 = B3;\r
- Hit.ALabel = ALabel;\r
- Hit.BLabel = BLabel;\r
- Hit.PctIdQA = IdQA*100.0;\r
- Hit.PctIdQB = IdQB*100.0;\r
- }\r
- else\r
- {\r
- Hit.A3 = B3;\r
- Hit.B3 = A3;\r
- Hit.ALabel = BLabel;\r
- Hit.BLabel = ALabel;\r
- Hit.PctIdQA = IdQB*100.0;\r
- Hit.PctIdQB = IdQA*100.0;\r
- }\r
-\r
-// CS SNPs\r
- Hit.CS_LY = 0;\r
- Hit.CS_LN = 0;\r
- Hit.CS_RY = 0;\r
- Hit.CS_RN = 0;\r
- Hit.CS_LA = 0;\r
- Hit.CS_RA = 0;\r
-\r
- //vector<float> Cons;\r
- //for (unsigned Col = 0; Col < ColCount; ++Col)\r
- // {\r
- // char q = Q3Seq[Col];\r
- // char a = A3Seq[Col];\r
- // char b = B3Seq[Col];\r
- // if (q == a && q == b && a == b)\r
- // {\r
- // Cons.push_back(1.0f);\r
- // continue;\r
- // }\r
-\r
- // bool gapq = isgap(q);\r
- // bool gapa = isgap(a);\r
- // bool gapb = isgap(b);\r
-\r
- // if (!gapq && !gapa && !gapb)\r
- // {\r
- // if (q == a || q == b || a == b)\r
- // Cons.push_back(0.75);\r
- // else\r
- // Cons.push_back(0.5);\r
- // }\r
- // else\r
- // {\r
- // if (!gapa && (a == b || a == q))\r
- // Cons.push_back(0.5f);\r
- // else if (!gapb && b == q)\r
- // Cons.push_back(0.5f);\r
- // else\r
- // Cons.push_back(0.0f);\r
- // }\r
- // }\r
-\r
- //float fLY = 0.0f;\r
- //float fLN = 0.0f;\r
- //float fLA = 0.0f;\r
- //float fRY = 0.0f;\r
- //float fRN = 0.0f;\r
- //float fRA = 0.0f;\r
- for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- if (q == a && q == b && a == b)\r
- continue;\r
-\r
- unsigned ngaps = 0;\r
- if (isgap(q))\r
- ++ngaps;\r
- if (isgap(a))\r
- ++ngaps;\r
- if (isgap(b))\r
- ++ngaps;\r
-\r
- if (opt_skipgaps)\r
- {\r
- if (ngaps == 3)\r
- continue;\r
- }\r
- else\r
- {\r
- if (ngaps == 2)\r
- continue;\r
- }\r
-\r
- if (!FirstA)\r
- swap(a, b);\r
-\r
- //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4;\r
- //if (Col < ColXLo)\r
- // {\r
- // if (q == a && q != b)\r
- // fLY += AvgCons;\r
- // else if (q == b && q != a)\r
- // fLN += AvgCons;\r
- // else\r
- // fLA += AvgCons;\r
- // }\r
- //else if (Col > ColXHi)\r
- // {\r
- // if (q == b && q != a)\r
- // fRY += AvgCons;\r
- // else if (q == a && q != b)\r
- // fRN += AvgCons;\r
- // else\r
- // fRA += AvgCons;\r
- // }\r
-\r
- if (opt_skipgaps2)\r
- {\r
- if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
- continue;\r
- if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
- continue;\r
- }\r
-\r
- //if (Col > 0 && isgap(Q3Seq[Col-1]))\r
- //continue;\r
- //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1]))\r
- // continue;\r
-\r
- if (Col < ColXLo)\r
- {\r
- if (q == a && q != b)\r
- ++Hit.CS_LY;\r
- else if (q == b && q != a)\r
- ++Hit.CS_LN;\r
- else\r
- ++Hit.CS_LA;\r
- }\r
- else if (Col > ColXHi)\r
- {\r
- if (q == b && q != a)\r
- ++Hit.CS_RY;\r
- else if (q == a && q != b)\r
- ++Hit.CS_RN;\r
- else\r
- ++Hit.CS_RA;\r
- }\r
- }\r
-\r
- double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
- double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
- Hit.Score = ScoreL*ScoreR;\r
-\r
- extern bool g_UchimeDeNovo;\r
-\r
- //if (0)//g_UchimeDeNovo)\r
- // {\r
- // double AbQ = GetAbFromLabel(QLabel.c_str());\r
- // double AbA = GetAbFromLabel(ALabel.c_str());\r
- // double AbB = GetAbFromLabel(BLabel.c_str());\r
- // if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0)\r
- // {\r
- // double MinAb = min(AbA, AbB);\r
- // double Ratio = MinAb/AbQ;\r
- // double t = Ratio - opt_abx;\r
- // // double Factor = 2.0/(1.0 + exp(-t));\r
- // double Factor = min(Ratio, opt_abx)/opt_abx;\r
- // if (opt_verbose)\r
- // Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str());\r
- // Hit.Score *= Factor;\r
- // }\r
- // }\r
-\r
- extern FILE *g_fUChimeAlns;\r
- if (g_fUChimeAlns != 0 && Hit.Div > 0.0)\r
- {\r
- void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit);\r
- WriteChimeHitX(g_fUChimeAlns, Hit);\r
- }\r
- }\r
-\r
-void AlignChime3(const string &Q3, const string &A3, const string &B3,\r
- const string &QLabel, const string &ALabel, const string &BLabel,\r
- ChimeHit2 &Hit)\r
- {\r
- if (opt_ucl)\r
- AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
- else\r
- AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
- }\r
-\r
-static void StripGaps(const byte *Seq, unsigned L, string &s)\r
- {\r
- s.clear();\r
- for (unsigned i = 0; i < L; ++i)\r
- {\r
- char c = Seq[i];\r
- if (!isgap(c))\r
- s.push_back(c);\r
- }\r
- }\r
-\r
-static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut)\r
- {\r
- SDOut = SDIn;\r
- byte *s = myalloc(byte, SDIn.L);\r
- unsigned k = 0;\r
- for (unsigned i = 0; i < SDIn.L; ++i)\r
- {\r
- char c = SDIn.Seq[i];\r
- if (!isgap(c))\r
- s[k++] = toupper(c);\r
- }\r
- SDOut.Seq = s;\r
- SDOut.L = k;\r
- }\r
-\r
-void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
- const string &PathQA, const string &PathQB, ChimeHit2 &Hit)\r
- {\r
- //if (opt_ucl)\r
- // {\r
- // AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
- // return;\r
- // }\r
-\r
- string Q3;\r
- string A3;\r
- string B3;\r
- Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3);\r
-\r
- AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit);\r
- }\r
-\r
-void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
- ChimeHit2 &Hit)\r
- {\r
- SeqData QSD;\r
- SeqData ASD;\r
- SeqData BSD;\r
- StripGapsAlloc(QSD3, QSD);\r
- StripGapsAlloc(ASD3, ASD);\r
- StripGapsAlloc(BSD3, BSD);\r
-\r
- string PathQA;\r
- string PathQB;\r
- bool FoundQA = GlobalAlign(QSD, ASD, PathQA);\r
- bool FoundQB = GlobalAlign(QSD, BSD, PathQB);\r
- if (!FoundQA || !FoundQB)\r
- {\r
- Hit.Clear();\r
- Hit.QLabel = QSD3.Label;\r
- return;\r
- }\r
-\r
- AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
-\r
- myfree((void *) QSD.Seq);\r
- myfree((void *) ASD.Seq);\r
- myfree((void *) BSD.Seq);\r
- }\r
-\r
-void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
- ChimeHit2 &Hit)\r
- {\r
- if (opt_realign)\r
- {\r
- AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit);\r
- return;\r
- }\r
-\r
- string Q3;\r
- string A3;\r
- string B3;\r
-\r
- const unsigned ColCount = QSD3.L;\r
- asserta(ASD3.L == ColCount && BSD3.L == ColCount);\r
-\r
- Q3.reserve(ColCount);\r
- A3.reserve(ColCount);\r
- B3.reserve(ColCount);\r
-\r
- const byte *QS = QSD3.Seq;\r
- const byte *AS = ASD3.Seq;\r
- const byte *BS = BSD3.Seq;\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- byte q = toupper(QS[Col]);\r
- byte a = toupper(AS[Col]);\r
- byte b = toupper(BS[Col]);\r
-\r
- if (isgap(q) && isgap(a) && isgap(b))\r
- continue;\r
-\r
- Q3.push_back(q);\r
- A3.push_back(a);\r
- B3.push_back(b);\r
- }\r
-\r
- AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit);\r
- }\r
+++ /dev/null
-#include "myutils.h"\r
-#include "seq.h"\r
-#include "chime.h"\r
-\r
-#define TRACE 0\r
-\r
-/***\r
-Let:\r
- S[i] = Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
-\r
- V[k] = Best segment score from j, j+1 .. k for all possible j\r
- max(j) Sum i=j..k S[i]\r
-\r
-Recursion relation:\r
- V[k] = S[k] + max (V[k-1], 0)\r
-***/\r
-\r
-void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
- const string &QLabel, const string &ALabel, const string &BLabel,\r
- ChimeHit2 &Hit);\r
-\r
-void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
- const string &PathQA, const string &PathQB,\r
- string &Q3, string &A3, string &B3);\r
-\r
-double GetScore2(double Y, double N, double A);\r
-\r
-void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
- const string &QLabel, const string &ALabel, const string &BLabel,\r
- ChimeHit2 &Hit)\r
- {\r
- Hit.Clear();\r
-\r
- const byte *Q3Seq = (const byte *) Q3.c_str();\r
- const byte *A3Seq = (const byte *) A3.c_str();\r
- const byte *B3Seq = (const byte *) B3.c_str();\r
-\r
- const unsigned ColCount = SIZE(Q3);\r
- asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
-\r
- vector<float> ColScoresA(ColCount, 0.0f);\r
- vector<float> ColScoresB(ColCount, 0.0f);\r
-\r
- float ScoreN = -(float) opt_xn;\r
- unsigned QL = 0;\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (!isgap(q))\r
- ++QL;\r
-\r
- if (q == a && q == b && a == b)\r
- continue;\r
-\r
- if (isgap(q) || isgap(a) || isgap(b))\r
- continue;\r
-\r
- if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
- continue;\r
-\r
- if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
- continue;\r
-\r
- if (q == a && q != b)\r
- ColScoresA[Col] = 1;\r
- else\r
- ColScoresA[Col] = ScoreN;\r
-\r
- if (q == b && q != a)\r
- ColScoresB[Col] = 1;\r
- else\r
- ColScoresB[Col] = ScoreN;\r
- }\r
-\r
- vector<float> LVA(ColCount, 0.0f);\r
- vector<float> LVB(ColCount, 0.0f);\r
-\r
- LVA[0] = ColScoresA[0];\r
- LVB[0] = ColScoresB[0];\r
- for (unsigned Col = 1; Col < ColCount; ++Col)\r
- {\r
- LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
- LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
- }\r
-\r
- vector<float> RVA(ColCount, 0.0f);\r
- vector<float> RVB(ColCount, 0.0f);\r
-\r
- RVA[ColCount-1] = ColScoresA[ColCount-1];\r
- RVB[ColCount-1] = ColScoresB[ColCount-1];\r
- for (int Col = ColCount-2; Col >= 0; --Col)\r
- {\r
- RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
- RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
- }\r
-\r
- bool FirstA = true;\r
- float MaxSum = 0.0;\r
- unsigned ColX = UINT_MAX;\r
- for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
- {\r
- float Sum = LVA[Col] + RVB[Col+1];\r
- if (Sum > MaxSum)\r
- {\r
- FirstA = true;\r
- MaxSum = Sum;\r
- ColX = Col;\r
- }\r
- }\r
-\r
- for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
- {\r
- float Sum = LVB[Col] + RVA[Col+1];\r
- if (Sum > MaxSum)\r
- {\r
- FirstA = false;\r
- MaxSum = Sum;\r
- ColX = Col;\r
- }\r
- }\r
- if (ColX == UINT_MAX)\r
- return;\r
-\r
- unsigned ColLo = UINT_MAX;\r
- unsigned ColHi = UINT_MAX;\r
- if (FirstA)\r
- {\r
- float Sum = 0.0f;\r
- for (int Col = ColX; Col >= 0; --Col)\r
- {\r
- Sum += ColScoresA[Col];\r
- if (Sum >= LVA[ColX])\r
- {\r
- ColLo = Col;\r
- break;\r
- }\r
- }\r
- asserta(Sum >= LVA[ColX]);\r
- Sum = 0.0f;\r
- for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
- {\r
- Sum += ColScoresB[Col];\r
- if (Sum >= RVB[ColX])\r
- {\r
- ColHi = Col;\r
- break;\r
- }\r
- }\r
- asserta(Sum >= RVB[ColX]);\r
- }\r
- else\r
- {\r
- float Sum = 0.0f;\r
- for (int Col = ColX; Col >= 0; --Col)\r
- {\r
- Sum += ColScoresB[Col];\r
- if (Sum >= LVB[ColX])\r
- {\r
- ColLo = Col;\r
- break;\r
- }\r
- }\r
- asserta(Sum >= LVB[ColX]);\r
- Sum = 0.0f;\r
- for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
- {\r
- Sum += ColScoresA[Col];\r
- if (Sum >= RVA[ColX])\r
- {\r
- ColHi = Col;\r
- break;\r
- }\r
- }\r
- asserta(Sum >= RVA[ColX]);\r
- }\r
-\r
- unsigned ColXHi = ColX;\r
- for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- \r
- if (q == a && q == b && !isgap(q))\r
- ColXHi = Col;\r
- else\r
- break;\r
- }\r
-\r
- unsigned ColXLo = ColX;\r
- for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- \r
- if (q == a && q == b && !isgap(q))\r
- ColXLo = Col;\r
- else\r
- break;\r
- }\r
-\r
- unsigned IdQA = 0;\r
- unsigned IdQB = 0;\r
- unsigned IdAB = 0;\r
- unsigned NQA = 0;\r
- unsigned NQB = 0;\r
- unsigned NAB = 0;\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (!isgap(q) && !isgap(a))\r
- {\r
- ++NQA;\r
- if (q == a)\r
- ++IdQA;\r
- }\r
-\r
- if (!isgap(q) && !isgap(b))\r
- {\r
- ++NQB;\r
- if (q == b)\r
- ++IdQB;\r
- }\r
-\r
- if (!isgap(a) && !isgap(b))\r
- {\r
- ++NAB;\r
- if (a == b)\r
- ++IdAB;\r
- }\r
- }\r
-\r
- Hit.PctIdQA = Pct(IdQA, NQA);\r
- Hit.PctIdQB = Pct(IdQB, NQB);\r
- Hit.PctIdAB = Pct(IdAB, NAB);\r
-\r
- unsigned LIdQA = 0;\r
- unsigned LIdQB = 0;\r
- for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (!isgap(q) && !isgap(a))\r
- {\r
- if (q == a)\r
- ++LIdQA;\r
- }\r
-\r
- if (!isgap(q) && !isgap(b))\r
- {\r
- if (q == b)\r
- ++LIdQB;\r
- }\r
- }\r
-\r
- unsigned RIdQA = 0;\r
- unsigned RIdQB = 0;\r
- for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- if (!isgap(q) && !isgap(a))\r
- {\r
- if (q == a)\r
- ++RIdQA;\r
- }\r
-\r
- if (!isgap(q) && !isgap(b))\r
- {\r
- if (q == b)\r
- ++RIdQB;\r
- }\r
- }\r
-\r
- unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
- unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
- unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
- unsigned ColRange = ColHi - ColLo + 1;\r
- if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
- return;\r
-\r
-// double Div = Pct(MinIdDiff, QSD.L);\r
-\r
-#if TRACE\r
- {\r
- Log(" Col A Q B ScoreA ScoreB LVA LVB RVA RVB\n");\r
- Log("----- - - - ------- ------- ------- ------- ------- -------\n");\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
- continue;\r
-\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- Log("%5u %c %c %c", Col, a, q, b);\r
-\r
- if (ColScoresA[Col] == 0.0)\r
- Log(" %7.7s", "");\r
- else\r
- Log(" %7.1f", ColScoresA[Col]);\r
-\r
- if (ColScoresB[Col] == 0.0)\r
- Log(" %7.7s", "");\r
- else\r
- Log(" %7.1f", ColScoresB[Col]);\r
-\r
- Log(" %7.1f %7.1f %7.1f %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
-\r
- Log("\n");\r
- }\r
- Log("\n");\r
- Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
- MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
- Log(" LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
- }\r
-#endif\r
-\r
- string Q3L;\r
- string A3L;\r
- string B3L;\r
- for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
- {\r
- char q = Q3[Col];\r
- char a = A3[Col];\r
- char b = B3[Col];\r
-\r
- Q3L += q;\r
- A3L += a;\r
- B3L += b;\r
- }\r
-\r
- AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
-\r
-#if 0\r
-// CS SNPs\r
- Hit.CS_LY = 0;\r
- Hit.CS_LN = 0;\r
- Hit.CS_RY = 0;\r
- Hit.CS_RN = 0;\r
- Hit.CS_LA = 0;\r
- Hit.CS_RA = 0;\r
- for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
- if (q == a && q == b && a == b)\r
- continue;\r
- if (isgap(q) || isgap(a) || isgap(b))\r
- continue;\r
- if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
- continue;\r
- if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
- continue;\r
-\r
- if (!FirstA)\r
- swap(a, b);\r
-\r
- if (Col < ColXLo)\r
- {\r
- if (q == a && q != b)\r
- ++Hit.CS_LY;\r
- else if (q == b && q != a)\r
- ++Hit.CS_LN;\r
- else\r
- ++Hit.CS_LA;\r
- }\r
- else if (Col > ColXHi)\r
- {\r
- if (q == b && q != a)\r
- ++Hit.CS_RY;\r
- else if (q == a && q != b)\r
- ++Hit.CS_RN;\r
- else\r
- ++Hit.CS_RA;\r
- }\r
- }\r
-\r
- double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
- double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
- Hit.Score = ScoreL*ScoreR;\r
-\r
- //Hit.QSD = QSD;\r
- //if (FirstA)\r
- // {\r
- // Hit.ASD = ASD;\r
- // Hit.BSD = BSD;\r
- // Hit.PathQA = PathQA;\r
- // Hit.PathQB = PathQB;\r
- // }\r
- //else\r
- // {\r
- // Hit.ASD = BSD;\r
- // Hit.BSD = ASD;\r
- // }\r
-\r
- //Hit.ColLo = ColLo;\r
- //Hit.ColXLo = ColXLo;\r
- //Hit.ColXHi = ColXHi;\r
- //Hit.ColHi = ColHi;\r
- //Hit.Div = Div;\r
-\r
-// Hit.LogMe();\r
-#endif\r
- }\r
+++ /dev/null
-A(Alpha)\r
-A(Mx)\r
-A(ChainBrute)\r
-A(Chainer)\r
-A(Test)\r
-A(CompressPath)\r
-A(HSPFinder)\r
-A(Main)\r
-A(Clumps)\r
-A(Path)\r
-A(SeqDB)\r
-A(SFasta)\r
-A(SWUngapped)\r
-A(AllocBit)\r
-A(Ultra)\r
-A(UPGMA)\r
-A(Windex)\r
-A(XDropBwd)\r
-A(Xlat)\r
-A(MPath)\r
-A(ScoreCache)\r
-A(TargetHits)\r
-A(Out)\r
-A(Hashdex)\r
+++ /dev/null
-#ifndef alnheuristics_h\r
-#define alnheuristics_h\r
-\r
-struct AlnParams;\r
-\r
-struct AlnHeuristics\r
- {\r
- unsigned BandRadius;\r
- unsigned HSPFinderWordLength;\r
- float SeedT;\r
-\r
- float XDropG; // GappedBlast default\r
- float XDropU; // UngappedBlast default\r
- float XDropUG; // UngappedBlast called by GappedBlast\r
-\r
- unsigned MinGlobalHSPLength;\r
-\r
- AlnHeuristics();\r
- void InitFromCmdLine(const AlnParams &AP);\r
- void InitGlobalFull();\r
-\r
- bool IsGlobalFull() const\r
- {\r
- return MinGlobalHSPLength == 0 && BandRadius == 0;\r
- }\r
-\r
- };\r
-\r
-#endif // alnheuristics_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include <float.h> // for FLT_MAX\r
-#include "mx.h"\r
-#include "alnparams.h"\r
-#include "hsp.h"\r
-\r
-#define TEST 0\r
-\r
-void SetBLOSUM62();
-void SetNucSubstMx(double Match, double Mismatch);\r
-void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
-
-extern Mx<float> g_SubstMxf;
-extern float **g_SubstMx;
-\r
-void AlnParams::Clear()\r
- {\r
- SubstMxName = 0;\r
- LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
- LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
- OpenA = OBVIOUSLY_WRONG_PENALTY;\r
- OpenB = OBVIOUSLY_WRONG_PENALTY;\r
- ExtA = OBVIOUSLY_WRONG_PENALTY;\r
- ExtB = OBVIOUSLY_WRONG_PENALTY;\r
- LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
- LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
- ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
- ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
- LExtA = OBVIOUSLY_WRONG_PENALTY;\r
- LExtB = OBVIOUSLY_WRONG_PENALTY;\r
- RExtA = OBVIOUSLY_WRONG_PENALTY;\r
- RExtB = OBVIOUSLY_WRONG_PENALTY;\r
- Nucleo = false;\r
- NucleoSet = false;\r
- }\r
-\r
-bool AlnParams::Is2() const\r
- {\r
- float g = OpenA;\r
- float e = ExtA;\r
- if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
- return false;\r
- if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
- return false;\r
- return true;\r
- }\r
-\r
-bool AlnParams::Is4() const\r
- {\r
- float g = OpenA;\r
- float tg = LOpenA;\r
- float e = ExtA;\r
- float te = LExtA;\r
- if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
- return false;\r
- if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
- return false;\r
- return true;\r
- }\r
-\r
-const char *AlnParams::GetType() const\r
- {\r
- if (Is2())\r
- return "2";\r
- else if (Is4())\r
- return "4";\r
- return "12";\r
- }\r
-\r
-void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
- {\r
- SubstMx = Mx;\r
- OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
- ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
- }\r
-\r
-void AlnParams::SetLocal(float Open, float Ext)\r
- {\r
- LocalOpen = Open;\r
- LocalExt = Ext;\r
- }\r
-\r
-void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
- float TermOpen, float TermExt)\r
- {\r
- SubstMx = Mx;\r
- OpenA = OpenB = Open;\r
- LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
- ExtA = ExtB = Ext;\r
- LExtA = LExtB = RExtA = RExtB = TermExt;\r
- }\r
-\r
-void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
- unsigned LA, unsigned LB)\r
- {\r
- SubstMx = AP.SubstMx;\r
- OpenA = AP.OpenA;\r
- OpenB = AP.OpenB;\r
- ExtA = AP.ExtA;\r
- ExtB = AP.ExtB;\r
-\r
- if (HSP.LeftA())\r
- {\r
- LOpenA = AP.LOpenA;\r
- LExtA = AP.LExtA;\r
- }\r
- else\r
- {\r
- LOpenA = AP.OpenA;\r
- LExtA = AP.ExtA;\r
- }\r
-\r
- if (HSP.LeftB())\r
- {\r
- LOpenB = AP.LOpenB;\r
- LExtB = AP.LExtB;\r
- }\r
- else\r
- {\r
- LOpenB = AP.OpenB;\r
- LExtB = AP.ExtB;\r
- }\r
-\r
- if (HSP.RightA(LA))\r
- {\r
- ROpenA = AP.ROpenA;\r
- RExtA = AP.RExtA;\r
- }\r
- else\r
- {\r
- ROpenA = AP.OpenA;\r
- RExtA = AP.ExtA;\r
- }\r
-\r
- if (HSP.RightB(LB))\r
- {\r
- ROpenB = AP.ROpenB;\r
- RExtB = AP.RExtB;\r
- }\r
- else\r
- {\r
- ROpenB = AP.OpenB;\r
- RExtB = AP.ExtB;\r
- }\r
- }\r
-\r
-void AlnParams::LogMe() const\r
- {\r
- Log("AlnParams(%s)", GetType());\r
- if (Is2())\r
- Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
- else if (Is4())\r
- Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
- else\r
- Log(\r
-" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
- OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
- Log("\n");\r
- }\r
-\r
-/***\r
-Open/Ext format string is one or more:\r
- [<flag><flag>...]<value>\r
-\r
-Value is (positive) penalty or * (disabled).\r
-Flag is:\r
- Q Query.\r
- T Target sequence.\r
- I Internal gaps (defafault internal and terminal).\r
- E End gaps (default internal and terminal).\r
- L Left end.\r
- R Right end.\r
-***/\r
-\r
-static void ParseGapStr(const string &s,\r
- float &QI, float &QL, float &QR,\r
- float &TI, float &TL, float &TR)\r
- {\r
- if (s.empty())\r
- return;\r
-\r
- bool Q = false;\r
- bool T = false;\r
- bool I = false;\r
- bool E = false;\r
- bool L = false;\r
- bool R = false;\r
-\r
- const unsigned K = SIZE(s);\r
- unsigned Dec = 0;\r
- float Value = FLT_MAX;\r
- for (unsigned i = 0; i <= K; ++i)\r
- {\r
- char c = s.c_str()[i];\r
- if (c == 0 || c == '/')\r
- {\r
- if (Value == FLT_MAX)\r
- Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
- if (!Q && !T && !I && !E && !L && !R)\r
- {\r
- Q = true;\r
- T = true;\r
- L = true;\r
- R = true;\r
- I = true;\r
- }\r
-\r
- if (!E && !I && !L && !R)\r
- {\r
- E = false;\r
- I = true;\r
- L = true;\r
- R = true;\r
- }\r
-\r
- if (E)\r
- {\r
- if (L || R)\r
- Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
- L = true;\r
- R = true;\r
- }\r
-\r
- if (!Q && !T)\r
- {\r
- Q = true;\r
- T = true;\r
- }\r
-\r
- if (Q && L)\r
- QL = -Value;\r
- if (Q && R)\r
- QR = -Value;\r
- if (Q && I)\r
- QI = -Value;\r
- if (T && L)\r
- TL = -Value;\r
- if (T && R)\r
- TR = -Value;\r
- if (T && I)\r
- TI = -Value;\r
- \r
- Value = FLT_MAX;\r
- Dec = 0;\r
- Q = false;\r
- T = false;\r
- I = false;\r
- E = false;\r
- L = false;\r
- R = false;\r
- }\r
- else if (c == '*')\r
- {\r
- if (Value != FLT_MAX)\r
- Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
- Value = -MINUS_INFINITY;\r
- }\r
- else if (isdigit(c))\r
- {\r
- if (Value == -MINUS_INFINITY)\r
- Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
- if (Value == FLT_MAX)\r
- Value = 0.0;\r
- if (Dec > 0)\r
- {\r
- Dec *= 10;\r
- Value += float(c - '0')/Dec;\r
- }\r
- else\r
- Value = Value*10 + (c - '0');\r
- }\r
- else if (c == '.')\r
- {\r
- if (Dec > 0)\r
- Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
- Dec = 1;\r
- }\r
- else\r
- {\r
- switch (c)\r
- {\r
- case 'Q':\r
- Q = true;\r
- break;\r
- case 'T':\r
- T = true;\r
- break;\r
- case 'I':\r
- I = true;\r
- break;\r
- case 'L':\r
- L = true;\r
- break;\r
- case 'R':\r
- R = true;\r
- break;\r
- case 'E':\r
- E = true;\r
- break;\r
- default:\r
- Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
- }\r
- }\r
- }\r
- }\r
-\r
-void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
- {\r
- ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
- ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
- }\r
-\r
-void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
- {\r
- if (IsNucleo)\r
- SetNucSubstMx(opt_match, opt_mismatch);
- else\r
- {\r
- if (opt_matrix == "")\r
- {\r
- SubstMxName = "BLOSUM62";\r
- SetBLOSUM62();
- }
- else\r
- {\r
- ReadSubstMx(opt_matrix, g_SubstMxf);\r
- g_SubstMx = g_SubstMxf.GetData();\r
- g_SubstMxf.LogMe();\r
- SubstMxName = opt_matrix.c_str();\r
- }\r
- }\r
- SubstMx = g_SubstMx;\r
- asserta(SubstMx != 0);\r
- }\r
-\r
-void AlnParams::InitFromCmdLine(bool IsNucleo)\r
- {\r
- Clear();\r
- Nucleo = IsNucleo;\r
- NucleoSet = true;\r
-\r
- SetMxFromCmdLine(IsNucleo);\r
-\r
-// Local\r
- if (optset_lopen || optset_lext)\r
- {\r
- if (!optset_lopen || !optset_lext)\r
- Die("Must set both --lopen and --lext");\r
- if (opt_lopen < 0.0 || opt_lext < 0.0)\r
- Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
- SetLocal(float(-opt_lopen), float(-opt_lext));\r
- }\r
- else\r
- {\r
- // Same penalties, if-statement to note could differ.\r
- if (IsNucleo)\r
- SetLocal(-10.0f, -1.0f);\r
- else\r
- SetLocal(-10.0f, -1.0f);\r
- }\r
-\r
-// Global\r
- if (IsNucleo)\r
- Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
- else\r
- Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
- SetPenalties(opt_gapopen, opt_gapext);\r
- }\r
-\r
-float AlnParams::GetLocalOpen() const\r
- {\r
- return LocalOpen;\r
- }\r
-\r
-float AlnParams::GetLocalExt() const\r
- {\r
- return LocalExt;\r
- }\r
-\r
-bool AlnParams::GetIsNucleo() const\r
- {\r
- asserta(NucleoSet);\r
- return Nucleo;\r
- }\r
-\r
-unsigned GetWindexWordLength(bool Nucleo)\r
- {\r
- if (optset_w)\r
- return opt_w;\r
-\r
- if (Nucleo)\r
- return 8;\r
- else\r
- return 5;\r
- }\r
-\r
-#if TEST\r
-static void Test1(const string &os, const string &es)\r
- {\r
- AlnParams AP;\r
- Log("\n");\r
- Log("OpenStr %s\n", os.c_str());\r
- Log(" ExtStr %s\n", es.c_str());\r
- AP.SetPenalties(os, es);\r
- AP.LogMe();\r
- }\r
-\r
-void TestGapStr()\r
- {\r
- Test1("17I/0.5E", "1I/0.5E");\r
- Test1("17I/0.5L/0.4R", "1Q/2T");\r
- Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
- }\r
-#endif // TEST\r
+++ /dev/null
-#ifndef alnparams_h\r
-#define alnparams_h\r
-\r
-struct HSPData;\r
-\r
-// Gap penalty scores are negative\r
-// (i.e., are scores, not penalties).\r
-struct AlnParams\r
- {\r
- const char *SubstMxName;\r
- const float * const *SubstMx;\r
-\r
- bool Nucleo;\r
- bool NucleoSet;\r
-\r
-// Local gaps\r
- float LocalOpen;\r
- float LocalExt;\r
-\r
-// Global internal gaps\r
- float OpenA;\r
- float OpenB;\r
-\r
- float ExtA;\r
- float ExtB;\r
-\r
-// Global terminal gaps\r
- float LOpenA;\r
- float LOpenB;\r
- float ROpenA;\r
- float ROpenB;\r
-\r
- float LExtA;\r
- float LExtB;\r
- float RExtA;\r
- float RExtB;\r
-\r
- void Clear();\r
- void SetLocal(float Open, float Ext);\r
- void Init2(const float * const *Mx, float Open, float Ext);\r
- void Init4(const float * const *Mx, float Open, float Ext, float TermOpen, float TermExt);\r
- void Init(const AlnParams &AP, const HSPData &HSP, unsigned LA, unsigned LB);\r
- void InitFromCmdLine(bool Nucleo);\r
- void SetMxFromCmdLine(bool Nucleo);\r
- void SetPenalties(const string &OpenStr, const string &ExtStr);\r
- float GetLocalOpen() const;\r
- float GetLocalExt() const;\r
- bool GetIsNucleo() const;\r
-\r
- bool Is2() const;\r
- bool Is4() const;\r
- const char *GetType() const;\r
-\r
- void LogMe() const;\r
- };\r
-\r
-const float OBVIOUSLY_WRONG_PENALTY = 1000.0;\r
-\r
-#endif // alnparams_h\r
+++ /dev/null
-// Generated by /p/py/alphac.py
-#include "alpha.h"
-
-unsigned g_CharToLetterAminoStop[256] =
- {
- INVALID_LETTER, // [ 0] 0x00
- INVALID_LETTER, // [ 1] 0x01
- INVALID_LETTER, // [ 2] 0x02
- INVALID_LETTER, // [ 3] 0x03
- INVALID_LETTER, // [ 4] 0x04
- INVALID_LETTER, // [ 5] 0x05
- INVALID_LETTER, // [ 6] 0x06
- INVALID_LETTER, // [ 7] 0x07
- INVALID_LETTER, // [ 8] 0x08
- INVALID_LETTER, // [ 9] 0x09
- INVALID_LETTER, // [ 10] 0x0a
- INVALID_LETTER, // [ 11] 0x0b
- INVALID_LETTER, // [ 12] 0x0c
- INVALID_LETTER, // [ 13] 0x0d
- INVALID_LETTER, // [ 14] 0x0e
- INVALID_LETTER, // [ 15] 0x0f
- INVALID_LETTER, // [ 16] 0x10
- INVALID_LETTER, // [ 17] 0x11
- INVALID_LETTER, // [ 18] 0x12
- INVALID_LETTER, // [ 19] 0x13
- INVALID_LETTER, // [ 20] 0x14
- INVALID_LETTER, // [ 21] 0x15
- INVALID_LETTER, // [ 22] 0x16
- INVALID_LETTER, // [ 23] 0x17
- INVALID_LETTER, // [ 24] 0x18
- INVALID_LETTER, // [ 25] 0x19
- INVALID_LETTER, // [ 26] 0x1a
- INVALID_LETTER, // [ 27] 0x1b
- INVALID_LETTER, // [ 28] 0x1c
- INVALID_LETTER, // [ 29] 0x1d
- INVALID_LETTER, // [ 30] 0x1e
- INVALID_LETTER, // [ 31] 0x1f
- INVALID_LETTER, // [ 32] ' '
- INVALID_LETTER, // [ 33] '!'
- INVALID_LETTER, // [ 34] '"'
- INVALID_LETTER, // [ 35] '#'
- INVALID_LETTER, // [ 36] '$'
- INVALID_LETTER, // [ 37] '%'
- INVALID_LETTER, // [ 38] '&'
- INVALID_LETTER, // [ 39] '''
- INVALID_LETTER, // [ 40] '('
- INVALID_LETTER, // [ 41] ')'
- 20 , // [ 42] '*' = STP
- INVALID_LETTER, // [ 43] '+'
- INVALID_LETTER, // [ 44] ','
- INVALID_LETTER, // [ 45] '-'
- INVALID_LETTER, // [ 46] '.'
- INVALID_LETTER, // [ 47] '/'
- INVALID_LETTER, // [ 48] '0'
- INVALID_LETTER, // [ 49] '1'
- INVALID_LETTER, // [ 50] '2'
- INVALID_LETTER, // [ 51] '3'
- INVALID_LETTER, // [ 52] '4'
- INVALID_LETTER, // [ 53] '5'
- INVALID_LETTER, // [ 54] '6'
- INVALID_LETTER, // [ 55] '7'
- INVALID_LETTER, // [ 56] '8'
- INVALID_LETTER, // [ 57] '9'
- INVALID_LETTER, // [ 58] ':'
- INVALID_LETTER, // [ 59] ';'
- INVALID_LETTER, // [ 60] '<'
- INVALID_LETTER, // [ 61] '='
- INVALID_LETTER, // [ 62] '>'
- INVALID_LETTER, // [ 63] '?'
- INVALID_LETTER, // [ 64] '@'
- 0 , // [ 65] 'A' = Ala
- INVALID_LETTER, // [ 66] 'B'
- 1 , // [ 67] 'C' = Cys
- 2 , // [ 68] 'D' = Asp
- 3 , // [ 69] 'E' = Glu
- 4 , // [ 70] 'F' = Phe
- 5 , // [ 71] 'G' = Gly
- 6 , // [ 72] 'H' = His
- 7 , // [ 73] 'I' = Ile
- INVALID_LETTER, // [ 74] 'J'
- 8 , // [ 75] 'K' = Lys
- 9 , // [ 76] 'L' = Leu
- 10 , // [ 77] 'M' = Met
- 11 , // [ 78] 'N' = Asn
- INVALID_LETTER, // [ 79] 'O'
- 12 , // [ 80] 'P' = Pro
- 13 , // [ 81] 'Q' = Gln
- 14 , // [ 82] 'R' = Arg
- 15 , // [ 83] 'S' = Ser
- 16 , // [ 84] 'T' = Thr
- INVALID_LETTER, // [ 85] 'U'
- 17 , // [ 86] 'V' = Val
- 18 , // [ 87] 'W' = Trp
- INVALID_LETTER, // [ 88] 'X'
- 19 , // [ 89] 'Y' = Tyr
- INVALID_LETTER, // [ 90] 'Z'
- INVALID_LETTER, // [ 91] '['
- INVALID_LETTER, // [ 92] '\'
- INVALID_LETTER, // [ 93] ']'
- INVALID_LETTER, // [ 94] '^'
- INVALID_LETTER, // [ 95] '_'
- INVALID_LETTER, // [ 96] '`'
- 0 , // [ 97] 'a' = Ala
- INVALID_LETTER, // [ 98] 'b'
- 1 , // [ 99] 'c' = Cys
- 2 , // [100] 'd' = Asp
- 3 , // [101] 'e' = Glu
- 4 , // [102] 'f' = Phe
- 5 , // [103] 'g' = Gly
- 6 , // [104] 'h' = His
- 7 , // [105] 'i' = Ile
- INVALID_LETTER, // [106] 'j'
- 8 , // [107] 'k' = Lys
- 9 , // [108] 'l' = Leu
- 10 , // [109] 'm' = Met
- 11 , // [110] 'n' = Asn
- INVALID_LETTER, // [111] 'o'
- 12 , // [112] 'p' = Pro
- 13 , // [113] 'q' = Gln
- 14 , // [114] 'r' = Arg
- 15 , // [115] 's' = Ser
- 16 , // [116] 't' = Thr
- INVALID_LETTER, // [117] 'u'
- 17 , // [118] 'v' = Val
- 18 , // [119] 'w' = Trp
- INVALID_LETTER, // [120] 'x'
- 19 , // [121] 'y' = Tyr
- INVALID_LETTER, // [122] 'z'
- INVALID_LETTER, // [123] '{'
- INVALID_LETTER, // [124] '|'
- INVALID_LETTER, // [125] '}'
- INVALID_LETTER, // [126] '~'
- INVALID_LETTER, // [127] 0x7f
- INVALID_LETTER, // [128] 0x80
- INVALID_LETTER, // [129] 0x81
- INVALID_LETTER, // [130] 0x82
- INVALID_LETTER, // [131] 0x83
- INVALID_LETTER, // [132] 0x84
- INVALID_LETTER, // [133] 0x85
- INVALID_LETTER, // [134] 0x86
- INVALID_LETTER, // [135] 0x87
- INVALID_LETTER, // [136] 0x88
- INVALID_LETTER, // [137] 0x89
- INVALID_LETTER, // [138] 0x8a
- INVALID_LETTER, // [139] 0x8b
- INVALID_LETTER, // [140] 0x8c
- INVALID_LETTER, // [141] 0x8d
- INVALID_LETTER, // [142] 0x8e
- INVALID_LETTER, // [143] 0x8f
- INVALID_LETTER, // [144] 0x90
- INVALID_LETTER, // [145] 0x91
- INVALID_LETTER, // [146] 0x92
- INVALID_LETTER, // [147] 0x93
- INVALID_LETTER, // [148] 0x94
- INVALID_LETTER, // [149] 0x95
- INVALID_LETTER, // [150] 0x96
- INVALID_LETTER, // [151] 0x97
- INVALID_LETTER, // [152] 0x98
- INVALID_LETTER, // [153] 0x99
- INVALID_LETTER, // [154] 0x9a
- INVALID_LETTER, // [155] 0x9b
- INVALID_LETTER, // [156] 0x9c
- INVALID_LETTER, // [157] 0x9d
- INVALID_LETTER, // [158] 0x9e
- INVALID_LETTER, // [159] 0x9f
- INVALID_LETTER, // [160] 0xa0
- INVALID_LETTER, // [161] 0xa1
- INVALID_LETTER, // [162] 0xa2
- INVALID_LETTER, // [163] 0xa3
- INVALID_LETTER, // [164] 0xa4
- INVALID_LETTER, // [165] 0xa5
- INVALID_LETTER, // [166] 0xa6
- INVALID_LETTER, // [167] 0xa7
- INVALID_LETTER, // [168] 0xa8
- INVALID_LETTER, // [169] 0xa9
- INVALID_LETTER, // [170] 0xaa
- INVALID_LETTER, // [171] 0xab
- INVALID_LETTER, // [172] 0xac
- INVALID_LETTER, // [173] 0xad
- INVALID_LETTER, // [174] 0xae
- INVALID_LETTER, // [175] 0xaf
- INVALID_LETTER, // [176] 0xb0
- INVALID_LETTER, // [177] 0xb1
- INVALID_LETTER, // [178] 0xb2
- INVALID_LETTER, // [179] 0xb3
- INVALID_LETTER, // [180] 0xb4
- INVALID_LETTER, // [181] 0xb5
- INVALID_LETTER, // [182] 0xb6
- INVALID_LETTER, // [183] 0xb7
- INVALID_LETTER, // [184] 0xb8
- INVALID_LETTER, // [185] 0xb9
- INVALID_LETTER, // [186] 0xba
- INVALID_LETTER, // [187] 0xbb
- INVALID_LETTER, // [188] 0xbc
- INVALID_LETTER, // [189] 0xbd
- INVALID_LETTER, // [190] 0xbe
- INVALID_LETTER, // [191] 0xbf
- INVALID_LETTER, // [192] 0xc0
- INVALID_LETTER, // [193] 0xc1
- INVALID_LETTER, // [194] 0xc2
- INVALID_LETTER, // [195] 0xc3
- INVALID_LETTER, // [196] 0xc4
- INVALID_LETTER, // [197] 0xc5
- INVALID_LETTER, // [198] 0xc6
- INVALID_LETTER, // [199] 0xc7
- INVALID_LETTER, // [200] 0xc8
- INVALID_LETTER, // [201] 0xc9
- INVALID_LETTER, // [202] 0xca
- INVALID_LETTER, // [203] 0xcb
- INVALID_LETTER, // [204] 0xcc
- INVALID_LETTER, // [205] 0xcd
- INVALID_LETTER, // [206] 0xce
- INVALID_LETTER, // [207] 0xcf
- INVALID_LETTER, // [208] 0xd0
- INVALID_LETTER, // [209] 0xd1
- INVALID_LETTER, // [210] 0xd2
- INVALID_LETTER, // [211] 0xd3
- INVALID_LETTER, // [212] 0xd4
- INVALID_LETTER, // [213] 0xd5
- INVALID_LETTER, // [214] 0xd6
- INVALID_LETTER, // [215] 0xd7
- INVALID_LETTER, // [216] 0xd8
- INVALID_LETTER, // [217] 0xd9
- INVALID_LETTER, // [218] 0xda
- INVALID_LETTER, // [219] 0xdb
- INVALID_LETTER, // [220] 0xdc
- INVALID_LETTER, // [221] 0xdd
- INVALID_LETTER, // [222] 0xde
- INVALID_LETTER, // [223] 0xdf
- INVALID_LETTER, // [224] 0xe0
- INVALID_LETTER, // [225] 0xe1
- INVALID_LETTER, // [226] 0xe2
- INVALID_LETTER, // [227] 0xe3
- INVALID_LETTER, // [228] 0xe4
- INVALID_LETTER, // [229] 0xe5
- INVALID_LETTER, // [230] 0xe6
- INVALID_LETTER, // [231] 0xe7
- INVALID_LETTER, // [232] 0xe8
- INVALID_LETTER, // [233] 0xe9
- INVALID_LETTER, // [234] 0xea
- INVALID_LETTER, // [235] 0xeb
- INVALID_LETTER, // [236] 0xec
- INVALID_LETTER, // [237] 0xed
- INVALID_LETTER, // [238] 0xee
- INVALID_LETTER, // [239] 0xef
- INVALID_LETTER, // [240] 0xf0
- INVALID_LETTER, // [241] 0xf1
- INVALID_LETTER, // [242] 0xf2
- INVALID_LETTER, // [243] 0xf3
- INVALID_LETTER, // [244] 0xf4
- INVALID_LETTER, // [245] 0xf5
- INVALID_LETTER, // [246] 0xf6
- INVALID_LETTER, // [247] 0xf7
- INVALID_LETTER, // [248] 0xf8
- INVALID_LETTER, // [249] 0xf9
- INVALID_LETTER, // [250] 0xfa
- INVALID_LETTER, // [251] 0xfb
- INVALID_LETTER, // [252] 0xfc
- INVALID_LETTER, // [253] 0xfd
- INVALID_LETTER, // [254] 0xfe
- INVALID_LETTER, // [255] 0xff
- };
-unsigned g_CharToLetterAmino[256] =
- {
- INVALID_LETTER, // [ 0] 0x00
- INVALID_LETTER, // [ 1] 0x01
- INVALID_LETTER, // [ 2] 0x02
- INVALID_LETTER, // [ 3] 0x03
- INVALID_LETTER, // [ 4] 0x04
- INVALID_LETTER, // [ 5] 0x05
- INVALID_LETTER, // [ 6] 0x06
- INVALID_LETTER, // [ 7] 0x07
- INVALID_LETTER, // [ 8] 0x08
- INVALID_LETTER, // [ 9] 0x09
- INVALID_LETTER, // [ 10] 0x0a
- INVALID_LETTER, // [ 11] 0x0b
- INVALID_LETTER, // [ 12] 0x0c
- INVALID_LETTER, // [ 13] 0x0d
- INVALID_LETTER, // [ 14] 0x0e
- INVALID_LETTER, // [ 15] 0x0f
- INVALID_LETTER, // [ 16] 0x10
- INVALID_LETTER, // [ 17] 0x11
- INVALID_LETTER, // [ 18] 0x12
- INVALID_LETTER, // [ 19] 0x13
- INVALID_LETTER, // [ 20] 0x14
- INVALID_LETTER, // [ 21] 0x15
- INVALID_LETTER, // [ 22] 0x16
- INVALID_LETTER, // [ 23] 0x17
- INVALID_LETTER, // [ 24] 0x18
- INVALID_LETTER, // [ 25] 0x19
- INVALID_LETTER, // [ 26] 0x1a
- INVALID_LETTER, // [ 27] 0x1b
- INVALID_LETTER, // [ 28] 0x1c
- INVALID_LETTER, // [ 29] 0x1d
- INVALID_LETTER, // [ 30] 0x1e
- INVALID_LETTER, // [ 31] 0x1f
- INVALID_LETTER, // [ 32] ' '
- INVALID_LETTER, // [ 33] '!'
- INVALID_LETTER, // [ 34] '"'
- INVALID_LETTER, // [ 35] '#'
- INVALID_LETTER, // [ 36] '$'
- INVALID_LETTER, // [ 37] '%'
- INVALID_LETTER, // [ 38] '&'
- INVALID_LETTER, // [ 39] '''
- INVALID_LETTER, // [ 40] '('
- INVALID_LETTER, // [ 41] ')'
- INVALID_LETTER, // [ 42] '*'
- INVALID_LETTER, // [ 43] '+'
- INVALID_LETTER, // [ 44] ','
- INVALID_LETTER, // [ 45] '-'
- INVALID_LETTER, // [ 46] '.'
- INVALID_LETTER, // [ 47] '/'
- INVALID_LETTER, // [ 48] '0'
- INVALID_LETTER, // [ 49] '1'
- INVALID_LETTER, // [ 50] '2'
- INVALID_LETTER, // [ 51] '3'
- INVALID_LETTER, // [ 52] '4'
- INVALID_LETTER, // [ 53] '5'
- INVALID_LETTER, // [ 54] '6'
- INVALID_LETTER, // [ 55] '7'
- INVALID_LETTER, // [ 56] '8'
- INVALID_LETTER, // [ 57] '9'
- INVALID_LETTER, // [ 58] ':'
- INVALID_LETTER, // [ 59] ';'
- INVALID_LETTER, // [ 60] '<'
- INVALID_LETTER, // [ 61] '='
- INVALID_LETTER, // [ 62] '>'
- INVALID_LETTER, // [ 63] '?'
- INVALID_LETTER, // [ 64] '@'
- 0 , // [ 65] 'A' = Ala
- INVALID_LETTER, // [ 66] 'B'
- 1 , // [ 67] 'C' = Cys
- 2 , // [ 68] 'D' = Asp
- 3 , // [ 69] 'E' = Glu
- 4 , // [ 70] 'F' = Phe
- 5 , // [ 71] 'G' = Gly
- 6 , // [ 72] 'H' = His
- 7 , // [ 73] 'I' = Ile
- INVALID_LETTER, // [ 74] 'J'
- 8 , // [ 75] 'K' = Lys
- 9 , // [ 76] 'L' = Leu
- 10 , // [ 77] 'M' = Met
- 11 , // [ 78] 'N' = Asn
- INVALID_LETTER, // [ 79] 'O'
- 12 , // [ 80] 'P' = Pro
- 13 , // [ 81] 'Q' = Gln
- 14 , // [ 82] 'R' = Arg
- 15 , // [ 83] 'S' = Ser
- 16 , // [ 84] 'T' = Thr
- INVALID_LETTER, // [ 85] 'U'
- 17 , // [ 86] 'V' = Val
- 18 , // [ 87] 'W' = Trp
- INVALID_LETTER, // [ 88] 'X'
- 19 , // [ 89] 'Y' = Tyr
- INVALID_LETTER, // [ 90] 'Z'
- INVALID_LETTER, // [ 91] '['
- INVALID_LETTER, // [ 92] '\'
- INVALID_LETTER, // [ 93] ']'
- INVALID_LETTER, // [ 94] '^'
- INVALID_LETTER, // [ 95] '_'
- INVALID_LETTER, // [ 96] '`'
- 0 , // [ 97] 'a' = Ala
- INVALID_LETTER, // [ 98] 'b'
- 1 , // [ 99] 'c' = Cys
- 2 , // [100] 'd' = Asp
- 3 , // [101] 'e' = Glu
- 4 , // [102] 'f' = Phe
- 5 , // [103] 'g' = Gly
- 6 , // [104] 'h' = His
- 7 , // [105] 'i' = Ile
- INVALID_LETTER, // [106] 'j'
- 8 , // [107] 'k' = Lys
- 9 , // [108] 'l' = Leu
- 10 , // [109] 'm' = Met
- 11 , // [110] 'n' = Asn
- INVALID_LETTER, // [111] 'o'
- 12 , // [112] 'p' = Pro
- 13 , // [113] 'q' = Gln
- 14 , // [114] 'r' = Arg
- 15 , // [115] 's' = Ser
- 16 , // [116] 't' = Thr
- INVALID_LETTER, // [117] 'u'
- 17 , // [118] 'v' = Val
- 18 , // [119] 'w' = Trp
- INVALID_LETTER, // [120] 'x'
- 19 , // [121] 'y' = Tyr
- INVALID_LETTER, // [122] 'z'
- INVALID_LETTER, // [123] '{'
- INVALID_LETTER, // [124] '|'
- INVALID_LETTER, // [125] '}'
- INVALID_LETTER, // [126] '~'
- INVALID_LETTER, // [127] 0x7f
- INVALID_LETTER, // [128] 0x80
- INVALID_LETTER, // [129] 0x81
- INVALID_LETTER, // [130] 0x82
- INVALID_LETTER, // [131] 0x83
- INVALID_LETTER, // [132] 0x84
- INVALID_LETTER, // [133] 0x85
- INVALID_LETTER, // [134] 0x86
- INVALID_LETTER, // [135] 0x87
- INVALID_LETTER, // [136] 0x88
- INVALID_LETTER, // [137] 0x89
- INVALID_LETTER, // [138] 0x8a
- INVALID_LETTER, // [139] 0x8b
- INVALID_LETTER, // [140] 0x8c
- INVALID_LETTER, // [141] 0x8d
- INVALID_LETTER, // [142] 0x8e
- INVALID_LETTER, // [143] 0x8f
- INVALID_LETTER, // [144] 0x90
- INVALID_LETTER, // [145] 0x91
- INVALID_LETTER, // [146] 0x92
- INVALID_LETTER, // [147] 0x93
- INVALID_LETTER, // [148] 0x94
- INVALID_LETTER, // [149] 0x95
- INVALID_LETTER, // [150] 0x96
- INVALID_LETTER, // [151] 0x97
- INVALID_LETTER, // [152] 0x98
- INVALID_LETTER, // [153] 0x99
- INVALID_LETTER, // [154] 0x9a
- INVALID_LETTER, // [155] 0x9b
- INVALID_LETTER, // [156] 0x9c
- INVALID_LETTER, // [157] 0x9d
- INVALID_LETTER, // [158] 0x9e
- INVALID_LETTER, // [159] 0x9f
- INVALID_LETTER, // [160] 0xa0
- INVALID_LETTER, // [161] 0xa1
- INVALID_LETTER, // [162] 0xa2
- INVALID_LETTER, // [163] 0xa3
- INVALID_LETTER, // [164] 0xa4
- INVALID_LETTER, // [165] 0xa5
- INVALID_LETTER, // [166] 0xa6
- INVALID_LETTER, // [167] 0xa7
- INVALID_LETTER, // [168] 0xa8
- INVALID_LETTER, // [169] 0xa9
- INVALID_LETTER, // [170] 0xaa
- INVALID_LETTER, // [171] 0xab
- INVALID_LETTER, // [172] 0xac
- INVALID_LETTER, // [173] 0xad
- INVALID_LETTER, // [174] 0xae
- INVALID_LETTER, // [175] 0xaf
- INVALID_LETTER, // [176] 0xb0
- INVALID_LETTER, // [177] 0xb1
- INVALID_LETTER, // [178] 0xb2
- INVALID_LETTER, // [179] 0xb3
- INVALID_LETTER, // [180] 0xb4
- INVALID_LETTER, // [181] 0xb5
- INVALID_LETTER, // [182] 0xb6
- INVALID_LETTER, // [183] 0xb7
- INVALID_LETTER, // [184] 0xb8
- INVALID_LETTER, // [185] 0xb9
- INVALID_LETTER, // [186] 0xba
- INVALID_LETTER, // [187] 0xbb
- INVALID_LETTER, // [188] 0xbc
- INVALID_LETTER, // [189] 0xbd
- INVALID_LETTER, // [190] 0xbe
- INVALID_LETTER, // [191] 0xbf
- INVALID_LETTER, // [192] 0xc0
- INVALID_LETTER, // [193] 0xc1
- INVALID_LETTER, // [194] 0xc2
- INVALID_LETTER, // [195] 0xc3
- INVALID_LETTER, // [196] 0xc4
- INVALID_LETTER, // [197] 0xc5
- INVALID_LETTER, // [198] 0xc6
- INVALID_LETTER, // [199] 0xc7
- INVALID_LETTER, // [200] 0xc8
- INVALID_LETTER, // [201] 0xc9
- INVALID_LETTER, // [202] 0xca
- INVALID_LETTER, // [203] 0xcb
- INVALID_LETTER, // [204] 0xcc
- INVALID_LETTER, // [205] 0xcd
- INVALID_LETTER, // [206] 0xce
- INVALID_LETTER, // [207] 0xcf
- INVALID_LETTER, // [208] 0xd0
- INVALID_LETTER, // [209] 0xd1
- INVALID_LETTER, // [210] 0xd2
- INVALID_LETTER, // [211] 0xd3
- INVALID_LETTER, // [212] 0xd4
- INVALID_LETTER, // [213] 0xd5
- INVALID_LETTER, // [214] 0xd6
- INVALID_LETTER, // [215] 0xd7
- INVALID_LETTER, // [216] 0xd8
- INVALID_LETTER, // [217] 0xd9
- INVALID_LETTER, // [218] 0xda
- INVALID_LETTER, // [219] 0xdb
- INVALID_LETTER, // [220] 0xdc
- INVALID_LETTER, // [221] 0xdd
- INVALID_LETTER, // [222] 0xde
- INVALID_LETTER, // [223] 0xdf
- INVALID_LETTER, // [224] 0xe0
- INVALID_LETTER, // [225] 0xe1
- INVALID_LETTER, // [226] 0xe2
- INVALID_LETTER, // [227] 0xe3
- INVALID_LETTER, // [228] 0xe4
- INVALID_LETTER, // [229] 0xe5
- INVALID_LETTER, // [230] 0xe6
- INVALID_LETTER, // [231] 0xe7
- INVALID_LETTER, // [232] 0xe8
- INVALID_LETTER, // [233] 0xe9
- INVALID_LETTER, // [234] 0xea
- INVALID_LETTER, // [235] 0xeb
- INVALID_LETTER, // [236] 0xec
- INVALID_LETTER, // [237] 0xed
- INVALID_LETTER, // [238] 0xee
- INVALID_LETTER, // [239] 0xef
- INVALID_LETTER, // [240] 0xf0
- INVALID_LETTER, // [241] 0xf1
- INVALID_LETTER, // [242] 0xf2
- INVALID_LETTER, // [243] 0xf3
- INVALID_LETTER, // [244] 0xf4
- INVALID_LETTER, // [245] 0xf5
- INVALID_LETTER, // [246] 0xf6
- INVALID_LETTER, // [247] 0xf7
- INVALID_LETTER, // [248] 0xf8
- INVALID_LETTER, // [249] 0xf9
- INVALID_LETTER, // [250] 0xfa
- INVALID_LETTER, // [251] 0xfb
- INVALID_LETTER, // [252] 0xfc
- INVALID_LETTER, // [253] 0xfd
- INVALID_LETTER, // [254] 0xfe
- INVALID_LETTER, // [255] 0xff
- };
-
-unsigned char g_LetterToCharAmino[256] =
- {
- 'A', // [0]
- 'C', // [1]
- 'D', // [2]
- 'E', // [3]
- 'F', // [4]
- 'G', // [5]
- 'H', // [6]
- 'I', // [7]
- 'K', // [8]
- 'L', // [9]
- 'M', // [10]
- 'N', // [11]
- 'P', // [12]
- 'Q', // [13]
- 'R', // [14]
- 'S', // [15]
- 'T', // [16]
- 'V', // [17]
- 'W', // [18]
- 'Y', // [19]
- '*', // [20]
- INVALID_CHAR, // [21]
- INVALID_CHAR, // [22]
- INVALID_CHAR, // [23]
- INVALID_CHAR, // [24]
- INVALID_CHAR, // [25]
- INVALID_CHAR, // [26]
- INVALID_CHAR, // [27]
- INVALID_CHAR, // [28]
- INVALID_CHAR, // [29]
- INVALID_CHAR, // [30]
- INVALID_CHAR, // [31]
- INVALID_CHAR, // [32]
- INVALID_CHAR, // [33]
- INVALID_CHAR, // [34]
- INVALID_CHAR, // [35]
- INVALID_CHAR, // [36]
- INVALID_CHAR, // [37]
- INVALID_CHAR, // [38]
- INVALID_CHAR, // [39]
- INVALID_CHAR, // [40]
- INVALID_CHAR, // [41]
- INVALID_CHAR, // [42]
- INVALID_CHAR, // [43]
- INVALID_CHAR, // [44]
- INVALID_CHAR, // [45]
- INVALID_CHAR, // [46]
- INVALID_CHAR, // [47]
- INVALID_CHAR, // [48]
- INVALID_CHAR, // [49]
- INVALID_CHAR, // [50]
- INVALID_CHAR, // [51]
- INVALID_CHAR, // [52]
- INVALID_CHAR, // [53]
- INVALID_CHAR, // [54]
- INVALID_CHAR, // [55]
- INVALID_CHAR, // [56]
- INVALID_CHAR, // [57]
- INVALID_CHAR, // [58]
- INVALID_CHAR, // [59]
- INVALID_CHAR, // [60]
- INVALID_CHAR, // [61]
- INVALID_CHAR, // [62]
- INVALID_CHAR, // [63]
- INVALID_CHAR, // [64]
- INVALID_CHAR, // [65]
- INVALID_CHAR, // [66]
- INVALID_CHAR, // [67]
- INVALID_CHAR, // [68]
- INVALID_CHAR, // [69]
- INVALID_CHAR, // [70]
- INVALID_CHAR, // [71]
- INVALID_CHAR, // [72]
- INVALID_CHAR, // [73]
- INVALID_CHAR, // [74]
- INVALID_CHAR, // [75]
- INVALID_CHAR, // [76]
- INVALID_CHAR, // [77]
- INVALID_CHAR, // [78]
- INVALID_CHAR, // [79]
- INVALID_CHAR, // [80]
- INVALID_CHAR, // [81]
- INVALID_CHAR, // [82]
- INVALID_CHAR, // [83]
- INVALID_CHAR, // [84]
- INVALID_CHAR, // [85]
- INVALID_CHAR, // [86]
- INVALID_CHAR, // [87]
- INVALID_CHAR, // [88]
- INVALID_CHAR, // [89]
- INVALID_CHAR, // [90]
- INVALID_CHAR, // [91]
- INVALID_CHAR, // [92]
- INVALID_CHAR, // [93]
- INVALID_CHAR, // [94]
- INVALID_CHAR, // [95]
- INVALID_CHAR, // [96]
- INVALID_CHAR, // [97]
- INVALID_CHAR, // [98]
- INVALID_CHAR, // [99]
- INVALID_CHAR, // [100]
- INVALID_CHAR, // [101]
- INVALID_CHAR, // [102]
- INVALID_CHAR, // [103]
- INVALID_CHAR, // [104]
- INVALID_CHAR, // [105]
- INVALID_CHAR, // [106]
- INVALID_CHAR, // [107]
- INVALID_CHAR, // [108]
- INVALID_CHAR, // [109]
- INVALID_CHAR, // [110]
- INVALID_CHAR, // [111]
- INVALID_CHAR, // [112]
- INVALID_CHAR, // [113]
- INVALID_CHAR, // [114]
- INVALID_CHAR, // [115]
- INVALID_CHAR, // [116]
- INVALID_CHAR, // [117]
- INVALID_CHAR, // [118]
- INVALID_CHAR, // [119]
- INVALID_CHAR, // [120]
- INVALID_CHAR, // [121]
- INVALID_CHAR, // [122]
- INVALID_CHAR, // [123]
- INVALID_CHAR, // [124]
- INVALID_CHAR, // [125]
- INVALID_CHAR, // [126]
- INVALID_CHAR, // [127]
- INVALID_CHAR, // [128]
- INVALID_CHAR, // [129]
- INVALID_CHAR, // [130]
- INVALID_CHAR, // [131]
- INVALID_CHAR, // [132]
- INVALID_CHAR, // [133]
- INVALID_CHAR, // [134]
- INVALID_CHAR, // [135]
- INVALID_CHAR, // [136]
- INVALID_CHAR, // [137]
- INVALID_CHAR, // [138]
- INVALID_CHAR, // [139]
- INVALID_CHAR, // [140]
- INVALID_CHAR, // [141]
- INVALID_CHAR, // [142]
- INVALID_CHAR, // [143]
- INVALID_CHAR, // [144]
- INVALID_CHAR, // [145]
- INVALID_CHAR, // [146]
- INVALID_CHAR, // [147]
- INVALID_CHAR, // [148]
- INVALID_CHAR, // [149]
- INVALID_CHAR, // [150]
- INVALID_CHAR, // [151]
- INVALID_CHAR, // [152]
- INVALID_CHAR, // [153]
- INVALID_CHAR, // [154]
- INVALID_CHAR, // [155]
- INVALID_CHAR, // [156]
- INVALID_CHAR, // [157]
- INVALID_CHAR, // [158]
- INVALID_CHAR, // [159]
- INVALID_CHAR, // [160]
- INVALID_CHAR, // [161]
- INVALID_CHAR, // [162]
- INVALID_CHAR, // [163]
- INVALID_CHAR, // [164]
- INVALID_CHAR, // [165]
- INVALID_CHAR, // [166]
- INVALID_CHAR, // [167]
- INVALID_CHAR, // [168]
- INVALID_CHAR, // [169]
- INVALID_CHAR, // [170]
- INVALID_CHAR, // [171]
- INVALID_CHAR, // [172]
- INVALID_CHAR, // [173]
- INVALID_CHAR, // [174]
- INVALID_CHAR, // [175]
- INVALID_CHAR, // [176]
- INVALID_CHAR, // [177]
- INVALID_CHAR, // [178]
- INVALID_CHAR, // [179]
- INVALID_CHAR, // [180]
- INVALID_CHAR, // [181]
- INVALID_CHAR, // [182]
- INVALID_CHAR, // [183]
- INVALID_CHAR, // [184]
- INVALID_CHAR, // [185]
- INVALID_CHAR, // [186]
- INVALID_CHAR, // [187]
- INVALID_CHAR, // [188]
- INVALID_CHAR, // [189]
- INVALID_CHAR, // [190]
- INVALID_CHAR, // [191]
- INVALID_CHAR, // [192]
- INVALID_CHAR, // [193]
- INVALID_CHAR, // [194]
- INVALID_CHAR, // [195]
- INVALID_CHAR, // [196]
- INVALID_CHAR, // [197]
- INVALID_CHAR, // [198]
- INVALID_CHAR, // [199]
- INVALID_CHAR, // [200]
- INVALID_CHAR, // [201]
- INVALID_CHAR, // [202]
- INVALID_CHAR, // [203]
- INVALID_CHAR, // [204]
- INVALID_CHAR, // [205]
- INVALID_CHAR, // [206]
- INVALID_CHAR, // [207]
- INVALID_CHAR, // [208]
- INVALID_CHAR, // [209]
- INVALID_CHAR, // [210]
- INVALID_CHAR, // [211]
- INVALID_CHAR, // [212]
- INVALID_CHAR, // [213]
- INVALID_CHAR, // [214]
- INVALID_CHAR, // [215]
- INVALID_CHAR, // [216]
- INVALID_CHAR, // [217]
- INVALID_CHAR, // [218]
- INVALID_CHAR, // [219]
- INVALID_CHAR, // [220]
- INVALID_CHAR, // [221]
- INVALID_CHAR, // [222]
- INVALID_CHAR, // [223]
- INVALID_CHAR, // [224]
- INVALID_CHAR, // [225]
- INVALID_CHAR, // [226]
- INVALID_CHAR, // [227]
- INVALID_CHAR, // [228]
- INVALID_CHAR, // [229]
- INVALID_CHAR, // [230]
- INVALID_CHAR, // [231]
- INVALID_CHAR, // [232]
- INVALID_CHAR, // [233]
- INVALID_CHAR, // [234]
- INVALID_CHAR, // [235]
- INVALID_CHAR, // [236]
- INVALID_CHAR, // [237]
- INVALID_CHAR, // [238]
- INVALID_CHAR, // [239]
- INVALID_CHAR, // [240]
- INVALID_CHAR, // [241]
- INVALID_CHAR, // [242]
- INVALID_CHAR, // [243]
- INVALID_CHAR, // [244]
- INVALID_CHAR, // [245]
- INVALID_CHAR, // [246]
- INVALID_CHAR, // [247]
- INVALID_CHAR, // [248]
- INVALID_CHAR, // [249]
- INVALID_CHAR, // [250]
- INVALID_CHAR, // [251]
- INVALID_CHAR, // [252]
- INVALID_CHAR, // [253]
- INVALID_CHAR, // [254]
- INVALID_CHAR, // [255]
- };
-
-unsigned g_CharToLetterNucleo[256] =
- {
- INVALID_LETTER, // [ 0] = 0x00
- INVALID_LETTER, // [ 1] = 0x01
- INVALID_LETTER, // [ 2] = 0x02
- INVALID_LETTER, // [ 3] = 0x03
- INVALID_LETTER, // [ 4] = 0x04
- INVALID_LETTER, // [ 5] = 0x05
- INVALID_LETTER, // [ 6] = 0x06
- INVALID_LETTER, // [ 7] = 0x07
- INVALID_LETTER, // [ 8] = 0x08
- INVALID_LETTER, // [ 9] = 0x09
- INVALID_LETTER, // [ 10] = 0x0a
- INVALID_LETTER, // [ 11] = 0x0b
- INVALID_LETTER, // [ 12] = 0x0c
- INVALID_LETTER, // [ 13] = 0x0d
- INVALID_LETTER, // [ 14] = 0x0e
- INVALID_LETTER, // [ 15] = 0x0f
- INVALID_LETTER, // [ 16] = 0x10
- INVALID_LETTER, // [ 17] = 0x11
- INVALID_LETTER, // [ 18] = 0x12
- INVALID_LETTER, // [ 19] = 0x13
- INVALID_LETTER, // [ 20] = 0x14
- INVALID_LETTER, // [ 21] = 0x15
- INVALID_LETTER, // [ 22] = 0x16
- INVALID_LETTER, // [ 23] = 0x17
- INVALID_LETTER, // [ 24] = 0x18
- INVALID_LETTER, // [ 25] = 0x19
- INVALID_LETTER, // [ 26] = 0x1a
- INVALID_LETTER, // [ 27] = 0x1b
- INVALID_LETTER, // [ 28] = 0x1c
- INVALID_LETTER, // [ 29] = 0x1d
- INVALID_LETTER, // [ 30] = 0x1e
- INVALID_LETTER, // [ 31] = 0x1f
- INVALID_LETTER, // [ 32] = 32
- INVALID_LETTER, // [ 33] = 33
- INVALID_LETTER, // [ 34] = 34
- INVALID_LETTER, // [ 35] = 35
- INVALID_LETTER, // [ 36] = 36
- INVALID_LETTER, // [ 37] = 37
- INVALID_LETTER, // [ 38] = 38
- INVALID_LETTER, // [ 39] = 39
- INVALID_LETTER, // [ 40] = 40
- INVALID_LETTER, // [ 41] = 41
- INVALID_LETTER, // [ 42] = 42
- INVALID_LETTER, // [ 43] = 43
- INVALID_LETTER, // [ 44] = 44
- INVALID_LETTER, // [ 45] = 45
- INVALID_LETTER, // [ 46] = 46
- INVALID_LETTER, // [ 47] = 47
- INVALID_LETTER, // [ 48] = 48
- INVALID_LETTER, // [ 49] = 49
- INVALID_LETTER, // [ 50] = 50
- INVALID_LETTER, // [ 51] = 51
- INVALID_LETTER, // [ 52] = 52
- INVALID_LETTER, // [ 53] = 53
- INVALID_LETTER, // [ 54] = 54
- INVALID_LETTER, // [ 55] = 55
- INVALID_LETTER, // [ 56] = 56
- INVALID_LETTER, // [ 57] = 57
- INVALID_LETTER, // [ 58] = 58
- INVALID_LETTER, // [ 59] = 59
- INVALID_LETTER, // [ 60] = 60
- INVALID_LETTER, // [ 61] = 61
- INVALID_LETTER, // [ 62] = 62
- INVALID_LETTER, // [ 63] = 63
- INVALID_LETTER, // [ 64] = 64
- 0 , // [ 65] = A (Nucleotide)
- INVALID_LETTER, // [ 66] = 66
- 1 , // [ 67] = C (Nucleotide)
- INVALID_LETTER, // [ 68] = 68
- INVALID_LETTER, // [ 69] = 69
- INVALID_LETTER, // [ 70] = 70
- 2 , // [ 71] = G (Nucleotide)
- INVALID_LETTER, // [ 72] = 72
- INVALID_LETTER, // [ 73] = 73
- INVALID_LETTER, // [ 74] = 74
- INVALID_LETTER, // [ 75] = 75
- INVALID_LETTER, // [ 76] = 76
- INVALID_LETTER, // [ 77] = 77
- INVALID_LETTER, // [ 78] = 78
- INVALID_LETTER, // [ 79] = 79
- INVALID_LETTER, // [ 80] = 80
- INVALID_LETTER, // [ 81] = 81
- INVALID_LETTER, // [ 82] = 82
- INVALID_LETTER, // [ 83] = 83
- 3 , // [ 84] = T (Nucleotide)
- 3 , // [ 85] = U (Nucleotide)
- INVALID_LETTER, // [ 86] = 86
- INVALID_LETTER, // [ 87] = 87
- INVALID_LETTER, // [ 88] = 88
- INVALID_LETTER, // [ 89] = 89
- INVALID_LETTER, // [ 90] = 90
- INVALID_LETTER, // [ 91] = 91
- INVALID_LETTER, // [ 92] = 92
- INVALID_LETTER, // [ 93] = 93
- INVALID_LETTER, // [ 94] = 94
- INVALID_LETTER, // [ 95] = 95
- INVALID_LETTER, // [ 96] = 96
- 0 , // [ 97] = a (Nucleotide)
- INVALID_LETTER, // [ 98] = 98
- 1 , // [ 99] = c (Nucleotide)
- INVALID_LETTER, // [100] = 100
- INVALID_LETTER, // [101] = 101
- INVALID_LETTER, // [102] = 102
- 2 , // [103] = g (Nucleotide)
- INVALID_LETTER, // [104] = 104
- INVALID_LETTER, // [105] = 105
- INVALID_LETTER, // [106] = 106
- INVALID_LETTER, // [107] = 107
- INVALID_LETTER, // [108] = 108
- INVALID_LETTER, // [109] = 109
- INVALID_LETTER, // [110] = 110
- INVALID_LETTER, // [111] = 111
- INVALID_LETTER, // [112] = 112
- INVALID_LETTER, // [113] = 113
- INVALID_LETTER, // [114] = 114
- INVALID_LETTER, // [115] = 115
- 3 , // [116] = t (Nucleotide)
- 3 , // [117] = u (Nucleotide)
- INVALID_LETTER, // [118] = 118
- INVALID_LETTER, // [119] = 119
- INVALID_LETTER, // [120] = 120
- INVALID_LETTER, // [121] = 121
- INVALID_LETTER, // [122] = 122
- INVALID_LETTER, // [123] = 123
- INVALID_LETTER, // [124] = 124
- INVALID_LETTER, // [125] = 125
- INVALID_LETTER, // [126] = 126
- INVALID_LETTER, // [127] = 0x7f
- INVALID_LETTER, // [128] = 0x80
- INVALID_LETTER, // [129] = 0x81
- INVALID_LETTER, // [130] = 0x82
- INVALID_LETTER, // [131] = 0x83
- INVALID_LETTER, // [132] = 0x84
- INVALID_LETTER, // [133] = 0x85
- INVALID_LETTER, // [134] = 0x86
- INVALID_LETTER, // [135] = 0x87
- INVALID_LETTER, // [136] = 0x88
- INVALID_LETTER, // [137] = 0x89
- INVALID_LETTER, // [138] = 0x8a
- INVALID_LETTER, // [139] = 0x8b
- INVALID_LETTER, // [140] = 0x8c
- INVALID_LETTER, // [141] = 0x8d
- INVALID_LETTER, // [142] = 0x8e
- INVALID_LETTER, // [143] = 0x8f
- INVALID_LETTER, // [144] = 0x90
- INVALID_LETTER, // [145] = 0x91
- INVALID_LETTER, // [146] = 0x92
- INVALID_LETTER, // [147] = 0x93
- INVALID_LETTER, // [148] = 0x94
- INVALID_LETTER, // [149] = 0x95
- INVALID_LETTER, // [150] = 0x96
- INVALID_LETTER, // [151] = 0x97
- INVALID_LETTER, // [152] = 0x98
- INVALID_LETTER, // [153] = 0x99
- INVALID_LETTER, // [154] = 0x9a
- INVALID_LETTER, // [155] = 0x9b
- INVALID_LETTER, // [156] = 0x9c
- INVALID_LETTER, // [157] = 0x9d
- INVALID_LETTER, // [158] = 0x9e
- INVALID_LETTER, // [159] = 0x9f
- INVALID_LETTER, // [160] = 0xa0
- INVALID_LETTER, // [161] = 0xa1
- INVALID_LETTER, // [162] = 0xa2
- INVALID_LETTER, // [163] = 0xa3
- INVALID_LETTER, // [164] = 0xa4
- INVALID_LETTER, // [165] = 0xa5
- INVALID_LETTER, // [166] = 0xa6
- INVALID_LETTER, // [167] = 0xa7
- INVALID_LETTER, // [168] = 0xa8
- INVALID_LETTER, // [169] = 0xa9
- INVALID_LETTER, // [170] = 0xaa
- INVALID_LETTER, // [171] = 0xab
- INVALID_LETTER, // [172] = 0xac
- INVALID_LETTER, // [173] = 0xad
- INVALID_LETTER, // [174] = 0xae
- INVALID_LETTER, // [175] = 0xaf
- INVALID_LETTER, // [176] = 0xb0
- INVALID_LETTER, // [177] = 0xb1
- INVALID_LETTER, // [178] = 0xb2
- INVALID_LETTER, // [179] = 0xb3
- INVALID_LETTER, // [180] = 0xb4
- INVALID_LETTER, // [181] = 0xb5
- INVALID_LETTER, // [182] = 0xb6
- INVALID_LETTER, // [183] = 0xb7
- INVALID_LETTER, // [184] = 0xb8
- INVALID_LETTER, // [185] = 0xb9
- INVALID_LETTER, // [186] = 0xba
- INVALID_LETTER, // [187] = 0xbb
- INVALID_LETTER, // [188] = 0xbc
- INVALID_LETTER, // [189] = 0xbd
- INVALID_LETTER, // [190] = 0xbe
- INVALID_LETTER, // [191] = 0xbf
- INVALID_LETTER, // [192] = 0xc0
- INVALID_LETTER, // [193] = 0xc1
- INVALID_LETTER, // [194] = 0xc2
- INVALID_LETTER, // [195] = 0xc3
- INVALID_LETTER, // [196] = 0xc4
- INVALID_LETTER, // [197] = 0xc5
- INVALID_LETTER, // [198] = 0xc6
- INVALID_LETTER, // [199] = 0xc7
- INVALID_LETTER, // [200] = 0xc8
- INVALID_LETTER, // [201] = 0xc9
- INVALID_LETTER, // [202] = 0xca
- INVALID_LETTER, // [203] = 0xcb
- INVALID_LETTER, // [204] = 0xcc
- INVALID_LETTER, // [205] = 0xcd
- INVALID_LETTER, // [206] = 0xce
- INVALID_LETTER, // [207] = 0xcf
- INVALID_LETTER, // [208] = 0xd0
- INVALID_LETTER, // [209] = 0xd1
- INVALID_LETTER, // [210] = 0xd2
- INVALID_LETTER, // [211] = 0xd3
- INVALID_LETTER, // [212] = 0xd4
- INVALID_LETTER, // [213] = 0xd5
- INVALID_LETTER, // [214] = 0xd6
- INVALID_LETTER, // [215] = 0xd7
- INVALID_LETTER, // [216] = 0xd8
- INVALID_LETTER, // [217] = 0xd9
- INVALID_LETTER, // [218] = 0xda
- INVALID_LETTER, // [219] = 0xdb
- INVALID_LETTER, // [220] = 0xdc
- INVALID_LETTER, // [221] = 0xdd
- INVALID_LETTER, // [222] = 0xde
- INVALID_LETTER, // [223] = 0xdf
- INVALID_LETTER, // [224] = 0xe0
- INVALID_LETTER, // [225] = 0xe1
- INVALID_LETTER, // [226] = 0xe2
- INVALID_LETTER, // [227] = 0xe3
- INVALID_LETTER, // [228] = 0xe4
- INVALID_LETTER, // [229] = 0xe5
- INVALID_LETTER, // [230] = 0xe6
- INVALID_LETTER, // [231] = 0xe7
- INVALID_LETTER, // [232] = 0xe8
- INVALID_LETTER, // [233] = 0xe9
- INVALID_LETTER, // [234] = 0xea
- INVALID_LETTER, // [235] = 0xeb
- INVALID_LETTER, // [236] = 0xec
- INVALID_LETTER, // [237] = 0xed
- INVALID_LETTER, // [238] = 0xee
- INVALID_LETTER, // [239] = 0xef
- INVALID_LETTER, // [240] = 0xf0
- INVALID_LETTER, // [241] = 0xf1
- INVALID_LETTER, // [242] = 0xf2
- INVALID_LETTER, // [243] = 0xf3
- INVALID_LETTER, // [244] = 0xf4
- INVALID_LETTER, // [245] = 0xf5
- INVALID_LETTER, // [246] = 0xf6
- INVALID_LETTER, // [247] = 0xf7
- INVALID_LETTER, // [248] = 0xf8
- INVALID_LETTER, // [249] = 0xf9
- INVALID_LETTER, // [250] = 0xfa
- INVALID_LETTER, // [251] = 0xfb
- INVALID_LETTER, // [252] = 0xfc
- INVALID_LETTER, // [253] = 0xfd
- INVALID_LETTER, // [254] = 0xfe
- INVALID_LETTER, // [255] = 0xff
- };
-
-unsigned char g_LetterToCharNucleo[256] =
- {
- 'A', // [0]
- 'C', // [1]
- 'G', // [2]
- 'T', // [3]
- INVALID_CHAR, // [4]
- INVALID_CHAR, // [5]
- INVALID_CHAR, // [6]
- INVALID_CHAR, // [7]
- INVALID_CHAR, // [8]
- INVALID_CHAR, // [9]
- INVALID_CHAR, // [10]
- INVALID_CHAR, // [11]
- INVALID_CHAR, // [12]
- INVALID_CHAR, // [13]
- INVALID_CHAR, // [14]
- INVALID_CHAR, // [15]
- INVALID_CHAR, // [16]
- INVALID_CHAR, // [17]
- INVALID_CHAR, // [18]
- INVALID_CHAR, // [19]
- INVALID_CHAR, // [20]
- INVALID_CHAR, // [21]
- INVALID_CHAR, // [22]
- INVALID_CHAR, // [23]
- INVALID_CHAR, // [24]
- INVALID_CHAR, // [25]
- INVALID_CHAR, // [26]
- INVALID_CHAR, // [27]
- INVALID_CHAR, // [28]
- INVALID_CHAR, // [29]
- INVALID_CHAR, // [30]
- INVALID_CHAR, // [31]
- INVALID_CHAR, // [32]
- INVALID_CHAR, // [33]
- INVALID_CHAR, // [34]
- INVALID_CHAR, // [35]
- INVALID_CHAR, // [36]
- INVALID_CHAR, // [37]
- INVALID_CHAR, // [38]
- INVALID_CHAR, // [39]
- INVALID_CHAR, // [40]
- INVALID_CHAR, // [41]
- INVALID_CHAR, // [42]
- INVALID_CHAR, // [43]
- INVALID_CHAR, // [44]
- INVALID_CHAR, // [45]
- INVALID_CHAR, // [46]
- INVALID_CHAR, // [47]
- INVALID_CHAR, // [48]
- INVALID_CHAR, // [49]
- INVALID_CHAR, // [50]
- INVALID_CHAR, // [51]
- INVALID_CHAR, // [52]
- INVALID_CHAR, // [53]
- INVALID_CHAR, // [54]
- INVALID_CHAR, // [55]
- INVALID_CHAR, // [56]
- INVALID_CHAR, // [57]
- INVALID_CHAR, // [58]
- INVALID_CHAR, // [59]
- INVALID_CHAR, // [60]
- INVALID_CHAR, // [61]
- INVALID_CHAR, // [62]
- INVALID_CHAR, // [63]
- INVALID_CHAR, // [64]
- INVALID_CHAR, // [65]
- INVALID_CHAR, // [66]
- INVALID_CHAR, // [67]
- INVALID_CHAR, // [68]
- INVALID_CHAR, // [69]
- INVALID_CHAR, // [70]
- INVALID_CHAR, // [71]
- INVALID_CHAR, // [72]
- INVALID_CHAR, // [73]
- INVALID_CHAR, // [74]
- INVALID_CHAR, // [75]
- INVALID_CHAR, // [76]
- INVALID_CHAR, // [77]
- INVALID_CHAR, // [78]
- INVALID_CHAR, // [79]
- INVALID_CHAR, // [80]
- INVALID_CHAR, // [81]
- INVALID_CHAR, // [82]
- INVALID_CHAR, // [83]
- INVALID_CHAR, // [84]
- INVALID_CHAR, // [85]
- INVALID_CHAR, // [86]
- INVALID_CHAR, // [87]
- INVALID_CHAR, // [88]
- INVALID_CHAR, // [89]
- INVALID_CHAR, // [90]
- INVALID_CHAR, // [91]
- INVALID_CHAR, // [92]
- INVALID_CHAR, // [93]
- INVALID_CHAR, // [94]
- INVALID_CHAR, // [95]
- INVALID_CHAR, // [96]
- INVALID_CHAR, // [97]
- INVALID_CHAR, // [98]
- INVALID_CHAR, // [99]
- INVALID_CHAR, // [100]
- INVALID_CHAR, // [101]
- INVALID_CHAR, // [102]
- INVALID_CHAR, // [103]
- INVALID_CHAR, // [104]
- INVALID_CHAR, // [105]
- INVALID_CHAR, // [106]
- INVALID_CHAR, // [107]
- INVALID_CHAR, // [108]
- INVALID_CHAR, // [109]
- INVALID_CHAR, // [110]
- INVALID_CHAR, // [111]
- INVALID_CHAR, // [112]
- INVALID_CHAR, // [113]
- INVALID_CHAR, // [114]
- INVALID_CHAR, // [115]
- INVALID_CHAR, // [116]
- INVALID_CHAR, // [117]
- INVALID_CHAR, // [118]
- INVALID_CHAR, // [119]
- INVALID_CHAR, // [120]
- INVALID_CHAR, // [121]
- INVALID_CHAR, // [122]
- INVALID_CHAR, // [123]
- INVALID_CHAR, // [124]
- INVALID_CHAR, // [125]
- INVALID_CHAR, // [126]
- INVALID_CHAR, // [127]
- INVALID_CHAR, // [128]
- INVALID_CHAR, // [129]
- INVALID_CHAR, // [130]
- INVALID_CHAR, // [131]
- INVALID_CHAR, // [132]
- INVALID_CHAR, // [133]
- INVALID_CHAR, // [134]
- INVALID_CHAR, // [135]
- INVALID_CHAR, // [136]
- INVALID_CHAR, // [137]
- INVALID_CHAR, // [138]
- INVALID_CHAR, // [139]
- INVALID_CHAR, // [140]
- INVALID_CHAR, // [141]
- INVALID_CHAR, // [142]
- INVALID_CHAR, // [143]
- INVALID_CHAR, // [144]
- INVALID_CHAR, // [145]
- INVALID_CHAR, // [146]
- INVALID_CHAR, // [147]
- INVALID_CHAR, // [148]
- INVALID_CHAR, // [149]
- INVALID_CHAR, // [150]
- INVALID_CHAR, // [151]
- INVALID_CHAR, // [152]
- INVALID_CHAR, // [153]
- INVALID_CHAR, // [154]
- INVALID_CHAR, // [155]
- INVALID_CHAR, // [156]
- INVALID_CHAR, // [157]
- INVALID_CHAR, // [158]
- INVALID_CHAR, // [159]
- INVALID_CHAR, // [160]
- INVALID_CHAR, // [161]
- INVALID_CHAR, // [162]
- INVALID_CHAR, // [163]
- INVALID_CHAR, // [164]
- INVALID_CHAR, // [165]
- INVALID_CHAR, // [166]
- INVALID_CHAR, // [167]
- INVALID_CHAR, // [168]
- INVALID_CHAR, // [169]
- INVALID_CHAR, // [170]
- INVALID_CHAR, // [171]
- INVALID_CHAR, // [172]
- INVALID_CHAR, // [173]
- INVALID_CHAR, // [174]
- INVALID_CHAR, // [175]
- INVALID_CHAR, // [176]
- INVALID_CHAR, // [177]
- INVALID_CHAR, // [178]
- INVALID_CHAR, // [179]
- INVALID_CHAR, // [180]
- INVALID_CHAR, // [181]
- INVALID_CHAR, // [182]
- INVALID_CHAR, // [183]
- INVALID_CHAR, // [184]
- INVALID_CHAR, // [185]
- INVALID_CHAR, // [186]
- INVALID_CHAR, // [187]
- INVALID_CHAR, // [188]
- INVALID_CHAR, // [189]
- INVALID_CHAR, // [190]
- INVALID_CHAR, // [191]
- INVALID_CHAR, // [192]
- INVALID_CHAR, // [193]
- INVALID_CHAR, // [194]
- INVALID_CHAR, // [195]
- INVALID_CHAR, // [196]
- INVALID_CHAR, // [197]
- INVALID_CHAR, // [198]
- INVALID_CHAR, // [199]
- INVALID_CHAR, // [200]
- INVALID_CHAR, // [201]
- INVALID_CHAR, // [202]
- INVALID_CHAR, // [203]
- INVALID_CHAR, // [204]
- INVALID_CHAR, // [205]
- INVALID_CHAR, // [206]
- INVALID_CHAR, // [207]
- INVALID_CHAR, // [208]
- INVALID_CHAR, // [209]
- INVALID_CHAR, // [210]
- INVALID_CHAR, // [211]
- INVALID_CHAR, // [212]
- INVALID_CHAR, // [213]
- INVALID_CHAR, // [214]
- INVALID_CHAR, // [215]
- INVALID_CHAR, // [216]
- INVALID_CHAR, // [217]
- INVALID_CHAR, // [218]
- INVALID_CHAR, // [219]
- INVALID_CHAR, // [220]
- INVALID_CHAR, // [221]
- INVALID_CHAR, // [222]
- INVALID_CHAR, // [223]
- INVALID_CHAR, // [224]
- INVALID_CHAR, // [225]
- INVALID_CHAR, // [226]
- INVALID_CHAR, // [227]
- INVALID_CHAR, // [228]
- INVALID_CHAR, // [229]
- INVALID_CHAR, // [230]
- INVALID_CHAR, // [231]
- INVALID_CHAR, // [232]
- INVALID_CHAR, // [233]
- INVALID_CHAR, // [234]
- INVALID_CHAR, // [235]
- INVALID_CHAR, // [236]
- INVALID_CHAR, // [237]
- INVALID_CHAR, // [238]
- INVALID_CHAR, // [239]
- INVALID_CHAR, // [240]
- INVALID_CHAR, // [241]
- INVALID_CHAR, // [242]
- INVALID_CHAR, // [243]
- INVALID_CHAR, // [244]
- INVALID_CHAR, // [245]
- INVALID_CHAR, // [246]
- INVALID_CHAR, // [247]
- INVALID_CHAR, // [248]
- INVALID_CHAR, // [249]
- INVALID_CHAR, // [250]
- INVALID_CHAR, // [251]
- INVALID_CHAR, // [252]
- INVALID_CHAR, // [253]
- INVALID_CHAR, // [254]
- INVALID_CHAR, // [255]
- };
-
-unsigned g_CodonWordToAminoLetter[4*4*4] =
- {
- 8 , // [ 0] = AAA K (Lys)
- 11, // [ 1] = AAC N (Asn)
- 8 , // [ 2] = AAG K (Lys)
- 11, // [ 3] = AAT N (Asn)
- 16, // [ 4] = ACA T (Thr)
- 16, // [ 5] = ACC T (Thr)
- 16, // [ 6] = ACG T (Thr)
- 16, // [ 7] = ACT T (Thr)
- 14, // [ 8] = AGA R (Arg)
- 15, // [ 9] = AGC S (Ser)
- 14, // [10] = AGG R (Arg)
- 15, // [11] = AGT S (Ser)
- 7 , // [12] = ATA I (Ile)
- 7 , // [13] = ATC I (Ile)
- 10, // [14] = ATG M (Met)
- 7 , // [15] = ATT I (Ile)
- 13, // [16] = CAA Q (Gln)
- 6 , // [17] = CAC H (His)
- 13, // [18] = CAG Q (Gln)
- 6 , // [19] = CAT H (His)
- 12, // [20] = CCA P (Pro)
- 12, // [21] = CCC P (Pro)
- 12, // [22] = CCG P (Pro)
- 12, // [23] = CCT P (Pro)
- 14, // [24] = CGA R (Arg)
- 14, // [25] = CGC R (Arg)
- 14, // [26] = CGG R (Arg)
- 14, // [27] = CGT R (Arg)
- 9 , // [28] = CTA L (Leu)
- 9 , // [29] = CTC L (Leu)
- 9 , // [30] = CTG L (Leu)
- 9 , // [31] = CTT L (Leu)
- 3 , // [32] = GAA E (Glu)
- 2 , // [33] = GAC D (Asp)
- 3 , // [34] = GAG E (Glu)
- 2 , // [35] = GAT D (Asp)
- 0 , // [36] = GCA A (Ala)
- 0 , // [37] = GCC A (Ala)
- 0 , // [38] = GCG A (Ala)
- 0 , // [39] = GCT A (Ala)
- 5 , // [40] = GGA G (Gly)
- 5 , // [41] = GGC G (Gly)
- 5 , // [42] = GGG G (Gly)
- 5 , // [43] = GGT G (Gly)
- 17, // [44] = GTA V (Val)
- 17, // [45] = GTC V (Val)
- 17, // [46] = GTG V (Val)
- 17, // [47] = GTT V (Val)
- 20, // [48] = TAA * (STP)
- 19, // [49] = TAC Y (Tyr)
- 20, // [50] = TAG * (STP)
- 19, // [51] = TAT Y (Tyr)
- 15, // [52] = TCA S (Ser)
- 15, // [53] = TCC S (Ser)
- 15, // [54] = TCG S (Ser)
- 15, // [55] = TCT S (Ser)
- 20, // [56] = TGA * (STP)
- 1 , // [57] = TGC C (Cys)
- 18, // [58] = TGG W (Trp)
- 1 , // [59] = TGT C (Cys)
- 9 , // [60] = TTA L (Leu)
- 4 , // [61] = TTC F (Phe)
- 9 , // [62] = TTG L (Leu)
- 4 , // [63] = TTT F (Phe)
- };
-
-char g_CodonWordToAminoChar[4*4*4] =
- {
- 'K', // [ 0] = AAA (Lys)
- 'N', // [ 1] = AAC (Asn)
- 'K', // [ 2] = AAG (Lys)
- 'N', // [ 3] = AAT (Asn)
- 'T', // [ 4] = ACA (Thr)
- 'T', // [ 5] = ACC (Thr)
- 'T', // [ 6] = ACG (Thr)
- 'T', // [ 7] = ACT (Thr)
- 'R', // [ 8] = AGA (Arg)
- 'S', // [ 9] = AGC (Ser)
- 'R', // [10] = AGG (Arg)
- 'S', // [11] = AGT (Ser)
- 'I', // [12] = ATA (Ile)
- 'I', // [13] = ATC (Ile)
- 'M', // [14] = ATG (Met)
- 'I', // [15] = ATT (Ile)
- 'Q', // [16] = CAA (Gln)
- 'H', // [17] = CAC (His)
- 'Q', // [18] = CAG (Gln)
- 'H', // [19] = CAT (His)
- 'P', // [20] = CCA (Pro)
- 'P', // [21] = CCC (Pro)
- 'P', // [22] = CCG (Pro)
- 'P', // [23] = CCT (Pro)
- 'R', // [24] = CGA (Arg)
- 'R', // [25] = CGC (Arg)
- 'R', // [26] = CGG (Arg)
- 'R', // [27] = CGT (Arg)
- 'L', // [28] = CTA (Leu)
- 'L', // [29] = CTC (Leu)
- 'L', // [30] = CTG (Leu)
- 'L', // [31] = CTT (Leu)
- 'E', // [32] = GAA (Glu)
- 'D', // [33] = GAC (Asp)
- 'E', // [34] = GAG (Glu)
- 'D', // [35] = GAT (Asp)
- 'A', // [36] = GCA (Ala)
- 'A', // [37] = GCC (Ala)
- 'A', // [38] = GCG (Ala)
- 'A', // [39] = GCT (Ala)
- 'G', // [40] = GGA (Gly)
- 'G', // [41] = GGC (Gly)
- 'G', // [42] = GGG (Gly)
- 'G', // [43] = GGT (Gly)
- 'V', // [44] = GTA (Val)
- 'V', // [45] = GTC (Val)
- 'V', // [46] = GTG (Val)
- 'V', // [47] = GTT (Val)
- '*', // [48] = TAA (STP)
- 'Y', // [49] = TAC (Tyr)
- '*', // [50] = TAG (STP)
- 'Y', // [51] = TAT (Tyr)
- 'S', // [52] = TCA (Ser)
- 'S', // [53] = TCC (Ser)
- 'S', // [54] = TCG (Ser)
- 'S', // [55] = TCT (Ser)
- '*', // [56] = TGA (STP)
- 'C', // [57] = TGC (Cys)
- 'W', // [58] = TGG (Trp)
- 'C', // [59] = TGT (Cys)
- 'L', // [60] = TTA (Leu)
- 'F', // [61] = TTC (Phe)
- 'L', // [62] = TTG (Leu)
- 'F', // [63] = TTT (Phe)
- };
-
-unsigned char g_CharToCompChar[256] =
- {
- INVALID_CHAR, // [ 0]
- INVALID_CHAR, // [ 1]
- INVALID_CHAR, // [ 2]
- INVALID_CHAR, // [ 3]
- INVALID_CHAR, // [ 4]
- INVALID_CHAR, // [ 5]
- INVALID_CHAR, // [ 6]
- INVALID_CHAR, // [ 7]
- INVALID_CHAR, // [ 8]
- INVALID_CHAR, // [ 9]
- INVALID_CHAR, // [ 10]
- INVALID_CHAR, // [ 11]
- INVALID_CHAR, // [ 12]
- INVALID_CHAR, // [ 13]
- INVALID_CHAR, // [ 14]
- INVALID_CHAR, // [ 15]
- INVALID_CHAR, // [ 16]
- INVALID_CHAR, // [ 17]
- INVALID_CHAR, // [ 18]
- INVALID_CHAR, // [ 19]
- INVALID_CHAR, // [ 20]
- INVALID_CHAR, // [ 21]
- INVALID_CHAR, // [ 22]
- INVALID_CHAR, // [ 23]
- INVALID_CHAR, // [ 24]
- INVALID_CHAR, // [ 25]
- INVALID_CHAR, // [ 26]
- INVALID_CHAR, // [ 27]
- INVALID_CHAR, // [ 28]
- INVALID_CHAR, // [ 29]
- INVALID_CHAR, // [ 30]
- INVALID_CHAR, // [ 31]
- INVALID_CHAR, // [ 32]
- INVALID_CHAR, // [ 33]
- INVALID_CHAR, // [ 34]
- INVALID_CHAR, // [ 35]
- INVALID_CHAR, // [ 36]
- INVALID_CHAR, // [ 37]
- INVALID_CHAR, // [ 38]
- INVALID_CHAR, // [ 39]
- INVALID_CHAR, // [ 40]
- INVALID_CHAR, // [ 41]
- INVALID_CHAR, // [ 42]
- INVALID_CHAR, // [ 43]
- INVALID_CHAR, // [ 44]
- INVALID_CHAR, // [ 45]
- INVALID_CHAR, // [ 46]
- INVALID_CHAR, // [ 47]
- INVALID_CHAR, // [ 48]
- INVALID_CHAR, // [ 49]
- INVALID_CHAR, // [ 50]
- INVALID_CHAR, // [ 51]
- INVALID_CHAR, // [ 52]
- INVALID_CHAR, // [ 53]
- INVALID_CHAR, // [ 54]
- INVALID_CHAR, // [ 55]
- INVALID_CHAR, // [ 56]
- INVALID_CHAR, // [ 57]
- INVALID_CHAR, // [ 58]
- INVALID_CHAR, // [ 59]
- INVALID_CHAR, // [ 60]
- INVALID_CHAR, // [ 61]
- INVALID_CHAR, // [ 62]
- INVALID_CHAR, // [ 63]
- INVALID_CHAR, // [ 64]
- 'T', // [ 65] A -> T
- INVALID_CHAR, // [ 66]
- 'G', // [ 67] C -> G
- INVALID_CHAR, // [ 68]
- INVALID_CHAR, // [ 69]
- INVALID_CHAR, // [ 70]
- 'C', // [ 71] G -> C
- INVALID_CHAR, // [ 72]
- INVALID_CHAR, // [ 73]
- INVALID_CHAR, // [ 74]
- INVALID_CHAR, // [ 75]
- INVALID_CHAR, // [ 76]
- INVALID_CHAR, // [ 77]
- INVALID_CHAR, // [ 78]
- INVALID_CHAR, // [ 79]
- INVALID_CHAR, // [ 80]
- INVALID_CHAR, // [ 81]
- INVALID_CHAR, // [ 82]
- INVALID_CHAR, // [ 83]
- 'A', // [ 84] T -> A
- 'A', // [ 85] U -> A
- INVALID_CHAR, // [ 86]
- INVALID_CHAR, // [ 87]
- INVALID_CHAR, // [ 88]
- INVALID_CHAR, // [ 89]
- INVALID_CHAR, // [ 90]
- INVALID_CHAR, // [ 91]
- INVALID_CHAR, // [ 92]
- INVALID_CHAR, // [ 93]
- INVALID_CHAR, // [ 94]
- INVALID_CHAR, // [ 95]
- INVALID_CHAR, // [ 96]
- 'T', // [ 97] a -> T
- INVALID_CHAR, // [ 98]
- 'G', // [ 99] c -> G
- INVALID_CHAR, // [100]
- INVALID_CHAR, // [101]
- INVALID_CHAR, // [102]
- 'C', // [103] g -> C
- INVALID_CHAR, // [104]
- INVALID_CHAR, // [105]
- INVALID_CHAR, // [106]
- INVALID_CHAR, // [107]
- INVALID_CHAR, // [108]
- INVALID_CHAR, // [109]
- INVALID_CHAR, // [110]
- INVALID_CHAR, // [111]
- INVALID_CHAR, // [112]
- INVALID_CHAR, // [113]
- INVALID_CHAR, // [114]
- INVALID_CHAR, // [115]
- 'A', // [116] t -> A
- 'A', // [117] u -> A
- INVALID_CHAR, // [118]
- INVALID_CHAR, // [119]
- INVALID_CHAR, // [120]
- INVALID_CHAR, // [121]
- INVALID_CHAR, // [122]
- INVALID_CHAR, // [123]
- INVALID_CHAR, // [124]
- INVALID_CHAR, // [125]
- INVALID_CHAR, // [126]
- INVALID_CHAR, // [127]
- INVALID_CHAR, // [128]
- INVALID_CHAR, // [129]
- INVALID_CHAR, // [130]
- INVALID_CHAR, // [131]
- INVALID_CHAR, // [132]
- INVALID_CHAR, // [133]
- INVALID_CHAR, // [134]
- INVALID_CHAR, // [135]
- INVALID_CHAR, // [136]
- INVALID_CHAR, // [137]
- INVALID_CHAR, // [138]
- INVALID_CHAR, // [139]
- INVALID_CHAR, // [140]
- INVALID_CHAR, // [141]
- INVALID_CHAR, // [142]
- INVALID_CHAR, // [143]
- INVALID_CHAR, // [144]
- INVALID_CHAR, // [145]
- INVALID_CHAR, // [146]
- INVALID_CHAR, // [147]
- INVALID_CHAR, // [148]
- INVALID_CHAR, // [149]
- INVALID_CHAR, // [150]
- INVALID_CHAR, // [151]
- INVALID_CHAR, // [152]
- INVALID_CHAR, // [153]
- INVALID_CHAR, // [154]
- INVALID_CHAR, // [155]
- INVALID_CHAR, // [156]
- INVALID_CHAR, // [157]
- INVALID_CHAR, // [158]
- INVALID_CHAR, // [159]
- INVALID_CHAR, // [160]
- INVALID_CHAR, // [161]
- INVALID_CHAR, // [162]
- INVALID_CHAR, // [163]
- INVALID_CHAR, // [164]
- INVALID_CHAR, // [165]
- INVALID_CHAR, // [166]
- INVALID_CHAR, // [167]
- INVALID_CHAR, // [168]
- INVALID_CHAR, // [169]
- INVALID_CHAR, // [170]
- INVALID_CHAR, // [171]
- INVALID_CHAR, // [172]
- INVALID_CHAR, // [173]
- INVALID_CHAR, // [174]
- INVALID_CHAR, // [175]
- INVALID_CHAR, // [176]
- INVALID_CHAR, // [177]
- INVALID_CHAR, // [178]
- INVALID_CHAR, // [179]
- INVALID_CHAR, // [180]
- INVALID_CHAR, // [181]
- INVALID_CHAR, // [182]
- INVALID_CHAR, // [183]
- INVALID_CHAR, // [184]
- INVALID_CHAR, // [185]
- INVALID_CHAR, // [186]
- INVALID_CHAR, // [187]
- INVALID_CHAR, // [188]
- INVALID_CHAR, // [189]
- INVALID_CHAR, // [190]
- INVALID_CHAR, // [191]
- INVALID_CHAR, // [192]
- INVALID_CHAR, // [193]
- INVALID_CHAR, // [194]
- INVALID_CHAR, // [195]
- INVALID_CHAR, // [196]
- INVALID_CHAR, // [197]
- INVALID_CHAR, // [198]
- INVALID_CHAR, // [199]
- INVALID_CHAR, // [200]
- INVALID_CHAR, // [201]
- INVALID_CHAR, // [202]
- INVALID_CHAR, // [203]
- INVALID_CHAR, // [204]
- INVALID_CHAR, // [205]
- INVALID_CHAR, // [206]
- INVALID_CHAR, // [207]
- INVALID_CHAR, // [208]
- INVALID_CHAR, // [209]
- INVALID_CHAR, // [210]
- INVALID_CHAR, // [211]
- INVALID_CHAR, // [212]
- INVALID_CHAR, // [213]
- INVALID_CHAR, // [214]
- INVALID_CHAR, // [215]
- INVALID_CHAR, // [216]
- INVALID_CHAR, // [217]
- INVALID_CHAR, // [218]
- INVALID_CHAR, // [219]
- INVALID_CHAR, // [220]
- INVALID_CHAR, // [221]
- INVALID_CHAR, // [222]
- INVALID_CHAR, // [223]
- INVALID_CHAR, // [224]
- INVALID_CHAR, // [225]
- INVALID_CHAR, // [226]
- INVALID_CHAR, // [227]
- INVALID_CHAR, // [228]
- INVALID_CHAR, // [229]
- INVALID_CHAR, // [230]
- INVALID_CHAR, // [231]
- INVALID_CHAR, // [232]
- INVALID_CHAR, // [233]
- INVALID_CHAR, // [234]
- INVALID_CHAR, // [235]
- INVALID_CHAR, // [236]
- INVALID_CHAR, // [237]
- INVALID_CHAR, // [238]
- INVALID_CHAR, // [239]
- INVALID_CHAR, // [240]
- INVALID_CHAR, // [241]
- INVALID_CHAR, // [242]
- INVALID_CHAR, // [243]
- INVALID_CHAR, // [244]
- INVALID_CHAR, // [245]
- INVALID_CHAR, // [246]
- INVALID_CHAR, // [247]
- INVALID_CHAR, // [248]
- INVALID_CHAR, // [249]
- INVALID_CHAR, // [250]
- INVALID_CHAR, // [251]
- INVALID_CHAR, // [252]
- INVALID_CHAR, // [253]
- INVALID_CHAR, // [254]
- INVALID_CHAR, // [255]
-};
-
-unsigned g_CharToCompLetter[256] =
- {
- INVALID_LETTER, // [ 0]
- INVALID_LETTER, // [ 1]
- INVALID_LETTER, // [ 2]
- INVALID_LETTER, // [ 3]
- INVALID_LETTER, // [ 4]
- INVALID_LETTER, // [ 5]
- INVALID_LETTER, // [ 6]
- INVALID_LETTER, // [ 7]
- INVALID_LETTER, // [ 8]
- INVALID_LETTER, // [ 9]
- INVALID_LETTER, // [ 10]
- INVALID_LETTER, // [ 11]
- INVALID_LETTER, // [ 12]
- INVALID_LETTER, // [ 13]
- INVALID_LETTER, // [ 14]
- INVALID_LETTER, // [ 15]
- INVALID_LETTER, // [ 16]
- INVALID_LETTER, // [ 17]
- INVALID_LETTER, // [ 18]
- INVALID_LETTER, // [ 19]
- INVALID_LETTER, // [ 20]
- INVALID_LETTER, // [ 21]
- INVALID_LETTER, // [ 22]
- INVALID_LETTER, // [ 23]
- INVALID_LETTER, // [ 24]
- INVALID_LETTER, // [ 25]
- INVALID_LETTER, // [ 26]
- INVALID_LETTER, // [ 27]
- INVALID_LETTER, // [ 28]
- INVALID_LETTER, // [ 29]
- INVALID_LETTER, // [ 30]
- INVALID_LETTER, // [ 31]
- INVALID_LETTER, // [ 32]
- INVALID_LETTER, // [ 33]
- INVALID_LETTER, // [ 34]
- INVALID_LETTER, // [ 35]
- INVALID_LETTER, // [ 36]
- INVALID_LETTER, // [ 37]
- INVALID_LETTER, // [ 38]
- INVALID_LETTER, // [ 39]
- INVALID_LETTER, // [ 40]
- INVALID_LETTER, // [ 41]
- INVALID_LETTER, // [ 42]
- INVALID_LETTER, // [ 43]
- INVALID_LETTER, // [ 44]
- INVALID_LETTER, // [ 45]
- INVALID_LETTER, // [ 46]
- INVALID_LETTER, // [ 47]
- INVALID_LETTER, // [ 48]
- INVALID_LETTER, // [ 49]
- INVALID_LETTER, // [ 50]
- INVALID_LETTER, // [ 51]
- INVALID_LETTER, // [ 52]
- INVALID_LETTER, // [ 53]
- INVALID_LETTER, // [ 54]
- INVALID_LETTER, // [ 55]
- INVALID_LETTER, // [ 56]
- INVALID_LETTER, // [ 57]
- INVALID_LETTER, // [ 58]
- INVALID_LETTER, // [ 59]
- INVALID_LETTER, // [ 60]
- INVALID_LETTER, // [ 61]
- INVALID_LETTER, // [ 62]
- INVALID_LETTER, // [ 63]
- INVALID_LETTER, // [ 64]
- 3, // [ 65] A -> T
- INVALID_LETTER, // [ 66]
- 2, // [ 67] C -> G
- INVALID_LETTER, // [ 68]
- INVALID_LETTER, // [ 69]
- INVALID_LETTER, // [ 70]
- 1, // [ 71] G -> C
- INVALID_LETTER, // [ 72]
- INVALID_LETTER, // [ 73]
- INVALID_LETTER, // [ 74]
- INVALID_LETTER, // [ 75]
- INVALID_LETTER, // [ 76]
- INVALID_LETTER, // [ 77]
- INVALID_LETTER, // [ 78]
- INVALID_LETTER, // [ 79]
- INVALID_LETTER, // [ 80]
- INVALID_LETTER, // [ 81]
- INVALID_LETTER, // [ 82]
- INVALID_LETTER, // [ 83]
- 0, // [ 84] T -> A
- 0, // [ 85] U -> A
- INVALID_LETTER, // [ 86]
- INVALID_LETTER, // [ 87]
- INVALID_LETTER, // [ 88]
- INVALID_LETTER, // [ 89]
- INVALID_LETTER, // [ 90]
- INVALID_LETTER, // [ 91]
- INVALID_LETTER, // [ 92]
- INVALID_LETTER, // [ 93]
- INVALID_LETTER, // [ 94]
- INVALID_LETTER, // [ 95]
- INVALID_LETTER, // [ 96]
- 3, // [ 97] a -> T
- INVALID_LETTER, // [ 98]
- 2, // [ 99] c -> G
- INVALID_LETTER, // [100]
- INVALID_LETTER, // [101]
- INVALID_LETTER, // [102]
- 1, // [103] g -> C
- INVALID_LETTER, // [104]
- INVALID_LETTER, // [105]
- INVALID_LETTER, // [106]
- INVALID_LETTER, // [107]
- INVALID_LETTER, // [108]
- INVALID_LETTER, // [109]
- INVALID_LETTER, // [110]
- INVALID_LETTER, // [111]
- INVALID_LETTER, // [112]
- INVALID_LETTER, // [113]
- INVALID_LETTER, // [114]
- INVALID_LETTER, // [115]
- 0, // [116] t -> A
- 0, // [117] u -> A
- INVALID_LETTER, // [118]
- INVALID_LETTER, // [119]
- INVALID_LETTER, // [120]
- INVALID_LETTER, // [121]
- INVALID_LETTER, // [122]
- INVALID_LETTER, // [123]
- INVALID_LETTER, // [124]
- INVALID_LETTER, // [125]
- INVALID_LETTER, // [126]
- INVALID_LETTER, // [127]
- INVALID_LETTER, // [128]
- INVALID_LETTER, // [129]
- INVALID_LETTER, // [130]
- INVALID_LETTER, // [131]
- INVALID_LETTER, // [132]
- INVALID_LETTER, // [133]
- INVALID_LETTER, // [134]
- INVALID_LETTER, // [135]
- INVALID_LETTER, // [136]
- INVALID_LETTER, // [137]
- INVALID_LETTER, // [138]
- INVALID_LETTER, // [139]
- INVALID_LETTER, // [140]
- INVALID_LETTER, // [141]
- INVALID_LETTER, // [142]
- INVALID_LETTER, // [143]
- INVALID_LETTER, // [144]
- INVALID_LETTER, // [145]
- INVALID_LETTER, // [146]
- INVALID_LETTER, // [147]
- INVALID_LETTER, // [148]
- INVALID_LETTER, // [149]
- INVALID_LETTER, // [150]
- INVALID_LETTER, // [151]
- INVALID_LETTER, // [152]
- INVALID_LETTER, // [153]
- INVALID_LETTER, // [154]
- INVALID_LETTER, // [155]
- INVALID_LETTER, // [156]
- INVALID_LETTER, // [157]
- INVALID_LETTER, // [158]
- INVALID_LETTER, // [159]
- INVALID_LETTER, // [160]
- INVALID_LETTER, // [161]
- INVALID_LETTER, // [162]
- INVALID_LETTER, // [163]
- INVALID_LETTER, // [164]
- INVALID_LETTER, // [165]
- INVALID_LETTER, // [166]
- INVALID_LETTER, // [167]
- INVALID_LETTER, // [168]
- INVALID_LETTER, // [169]
- INVALID_LETTER, // [170]
- INVALID_LETTER, // [171]
- INVALID_LETTER, // [172]
- INVALID_LETTER, // [173]
- INVALID_LETTER, // [174]
- INVALID_LETTER, // [175]
- INVALID_LETTER, // [176]
- INVALID_LETTER, // [177]
- INVALID_LETTER, // [178]
- INVALID_LETTER, // [179]
- INVALID_LETTER, // [180]
- INVALID_LETTER, // [181]
- INVALID_LETTER, // [182]
- INVALID_LETTER, // [183]
- INVALID_LETTER, // [184]
- INVALID_LETTER, // [185]
- INVALID_LETTER, // [186]
- INVALID_LETTER, // [187]
- INVALID_LETTER, // [188]
- INVALID_LETTER, // [189]
- INVALID_LETTER, // [190]
- INVALID_LETTER, // [191]
- INVALID_LETTER, // [192]
- INVALID_LETTER, // [193]
- INVALID_LETTER, // [194]
- INVALID_LETTER, // [195]
- INVALID_LETTER, // [196]
- INVALID_LETTER, // [197]
- INVALID_LETTER, // [198]
- INVALID_LETTER, // [199]
- INVALID_LETTER, // [200]
- INVALID_LETTER, // [201]
- INVALID_LETTER, // [202]
- INVALID_LETTER, // [203]
- INVALID_LETTER, // [204]
- INVALID_LETTER, // [205]
- INVALID_LETTER, // [206]
- INVALID_LETTER, // [207]
- INVALID_LETTER, // [208]
- INVALID_LETTER, // [209]
- INVALID_LETTER, // [210]
- INVALID_LETTER, // [211]
- INVALID_LETTER, // [212]
- INVALID_LETTER, // [213]
- INVALID_LETTER, // [214]
- INVALID_LETTER, // [215]
- INVALID_LETTER, // [216]
- INVALID_LETTER, // [217]
- INVALID_LETTER, // [218]
- INVALID_LETTER, // [219]
- INVALID_LETTER, // [220]
- INVALID_LETTER, // [221]
- INVALID_LETTER, // [222]
- INVALID_LETTER, // [223]
- INVALID_LETTER, // [224]
- INVALID_LETTER, // [225]
- INVALID_LETTER, // [226]
- INVALID_LETTER, // [227]
- INVALID_LETTER, // [228]
- INVALID_LETTER, // [229]
- INVALID_LETTER, // [230]
- INVALID_LETTER, // [231]
- INVALID_LETTER, // [232]
- INVALID_LETTER, // [233]
- INVALID_LETTER, // [234]
- INVALID_LETTER, // [235]
- INVALID_LETTER, // [236]
- INVALID_LETTER, // [237]
- INVALID_LETTER, // [238]
- INVALID_LETTER, // [239]
- INVALID_LETTER, // [240]
- INVALID_LETTER, // [241]
- INVALID_LETTER, // [242]
- INVALID_LETTER, // [243]
- INVALID_LETTER, // [244]
- INVALID_LETTER, // [245]
- INVALID_LETTER, // [246]
- INVALID_LETTER, // [247]
- INVALID_LETTER, // [248]
- INVALID_LETTER, // [249]
- INVALID_LETTER, // [250]
- INVALID_LETTER, // [251]
- INVALID_LETTER, // [252]
- INVALID_LETTER, // [253]
- INVALID_LETTER, // [254]
- INVALID_LETTER, // [255]
-};
-
-bool g_IsAminoChar[256] =
- {
- false, // [ 0] 0x00
- false, // [ 1] 0x01
- false, // [ 2] 0x02
- false, // [ 3] 0x03
- false, // [ 4] 0x04
- false, // [ 5] 0x05
- false, // [ 6] 0x06
- false, // [ 7] 0x07
- false, // [ 8] 0x08
- false, // [ 9] 0x09
- false, // [ 10] 0x0a
- false, // [ 11] 0x0b
- false, // [ 12] 0x0c
- false, // [ 13] 0x0d
- false, // [ 14] 0x0e
- false, // [ 15] 0x0f
- false, // [ 16] 0x10
- false, // [ 17] 0x11
- false, // [ 18] 0x12
- false, // [ 19] 0x13
- false, // [ 20] 0x14
- false, // [ 21] 0x15
- false, // [ 22] 0x16
- false, // [ 23] 0x17
- false, // [ 24] 0x18
- false, // [ 25] 0x19
- false, // [ 26] 0x1a
- false, // [ 27] 0x1b
- false, // [ 28] 0x1c
- false, // [ 29] 0x1d
- false, // [ 30] 0x1e
- false, // [ 31] 0x1f
- false, // [ 32] ' '
- false, // [ 33] '!'
- false, // [ 34] '"'
- false, // [ 35] '#'
- false, // [ 36] '$'
- false, // [ 37] '%'
- false, // [ 38] '&'
- false, // [ 39] '''
- false, // [ 40] '('
- false, // [ 41] ')'
- true, // [ 42] '*' = STP
- false, // [ 43] '+'
- false, // [ 44] ','
- false, // [ 45] '-'
- false, // [ 46] '.'
- false, // [ 47] '/'
- false, // [ 48] '0'
- false, // [ 49] '1'
- false, // [ 50] '2'
- false, // [ 51] '3'
- false, // [ 52] '4'
- false, // [ 53] '5'
- false, // [ 54] '6'
- false, // [ 55] '7'
- false, // [ 56] '8'
- false, // [ 57] '9'
- false, // [ 58] ':'
- false, // [ 59] ';'
- false, // [ 60] '<'
- false, // [ 61] '='
- false, // [ 62] '>'
- false, // [ 63] '?'
- false, // [ 64] '@'
- true, // [ 65] 'A' = Ala
- false, // [ 66] 'B'
- true, // [ 67] 'C' = Cys
- true, // [ 68] 'D' = Asp
- true, // [ 69] 'E' = Glu
- true, // [ 70] 'F' = Phe
- true, // [ 71] 'G' = Gly
- true, // [ 72] 'H' = His
- true, // [ 73] 'I' = Ile
- false, // [ 74] 'J'
- true, // [ 75] 'K' = Lys
- true, // [ 76] 'L' = Leu
- true, // [ 77] 'M' = Met
- true, // [ 78] 'N' = Asn
- false, // [ 79] 'O'
- true, // [ 80] 'P' = Pro
- true, // [ 81] 'Q' = Gln
- true, // [ 82] 'R' = Arg
- true, // [ 83] 'S' = Ser
- true, // [ 84] 'T' = Thr
- false, // [ 85] 'U'
- true, // [ 86] 'V' = Val
- true, // [ 87] 'W' = Trp
- false, // [ 88] 'X'
- true, // [ 89] 'Y' = Tyr
- false, // [ 90] 'Z'
- false, // [ 91] '['
- false, // [ 92] '\'
- false, // [ 93] ']'
- false, // [ 94] '^'
- false, // [ 95] '_'
- false, // [ 96] '`'
- true, // [ 97] 'A' = Ala
- false, // [ 98] 'B'
- true, // [ 99] 'C' = Cys
- true, // [100] 'D' = Asp
- true, // [101] 'E' = Glu
- true, // [102] 'F' = Phe
- true, // [103] 'G' = Gly
- true, // [104] 'H' = His
- true, // [105] 'I' = Ile
- false, // [106] 'J'
- true, // [107] 'K' = Lys
- true, // [108] 'L' = Leu
- true, // [109] 'M' = Met
- true, // [110] 'N' = Asn
- false, // [111] 'O'
- true, // [112] 'P' = Pro
- true, // [113] 'Q' = Gln
- true, // [114] 'R' = Arg
- true, // [115] 'S' = Ser
- true, // [116] 'T' = Thr
- false, // [117] 'U'
- true, // [118] 'V' = Val
- true, // [119] 'W' = Trp
- false, // [120] 'X'
- true, // [121] 'Y' = Tyr
- false, // [122] 'Z'
- false, // [123] '{'
- false, // [124] '|'
- false, // [125] '}'
- false, // [126] '~'
- false, // [127] 0x7f
- false, // [128] 0x80
- false, // [129] 0x81
- false, // [130] 0x82
- false, // [131] 0x83
- false, // [132] 0x84
- false, // [133] 0x85
- false, // [134] 0x86
- false, // [135] 0x87
- false, // [136] 0x88
- false, // [137] 0x89
- false, // [138] 0x8a
- false, // [139] 0x8b
- false, // [140] 0x8c
- false, // [141] 0x8d
- false, // [142] 0x8e
- false, // [143] 0x8f
- false, // [144] 0x90
- false, // [145] 0x91
- false, // [146] 0x92
- false, // [147] 0x93
- false, // [148] 0x94
- false, // [149] 0x95
- false, // [150] 0x96
- false, // [151] 0x97
- false, // [152] 0x98
- false, // [153] 0x99
- false, // [154] 0x9a
- false, // [155] 0x9b
- false, // [156] 0x9c
- false, // [157] 0x9d
- false, // [158] 0x9e
- false, // [159] 0x9f
- false, // [160] 0xa0
- false, // [161] 0xa1
- false, // [162] 0xa2
- false, // [163] 0xa3
- false, // [164] 0xa4
- false, // [165] 0xa5
- false, // [166] 0xa6
- false, // [167] 0xa7
- false, // [168] 0xa8
- false, // [169] 0xa9
- false, // [170] 0xaa
- false, // [171] 0xab
- false, // [172] 0xac
- false, // [173] 0xad
- false, // [174] 0xae
- false, // [175] 0xaf
- false, // [176] 0xb0
- false, // [177] 0xb1
- false, // [178] 0xb2
- false, // [179] 0xb3
- false, // [180] 0xb4
- false, // [181] 0xb5
- false, // [182] 0xb6
- false, // [183] 0xb7
- false, // [184] 0xb8
- false, // [185] 0xb9
- false, // [186] 0xba
- false, // [187] 0xbb
- false, // [188] 0xbc
- false, // [189] 0xbd
- false, // [190] 0xbe
- false, // [191] 0xbf
- false, // [192] 0xc0
- false, // [193] 0xc1
- false, // [194] 0xc2
- false, // [195] 0xc3
- false, // [196] 0xc4
- false, // [197] 0xc5
- false, // [198] 0xc6
- false, // [199] 0xc7
- false, // [200] 0xc8
- false, // [201] 0xc9
- false, // [202] 0xca
- false, // [203] 0xcb
- false, // [204] 0xcc
- false, // [205] 0xcd
- false, // [206] 0xce
- false, // [207] 0xcf
- false, // [208] 0xd0
- false, // [209] 0xd1
- false, // [210] 0xd2
- false, // [211] 0xd3
- false, // [212] 0xd4
- false, // [213] 0xd5
- false, // [214] 0xd6
- false, // [215] 0xd7
- false, // [216] 0xd8
- false, // [217] 0xd9
- false, // [218] 0xda
- false, // [219] 0xdb
- false, // [220] 0xdc
- false, // [221] 0xdd
- false, // [222] 0xde
- false, // [223] 0xdf
- false, // [224] 0xe0
- false, // [225] 0xe1
- false, // [226] 0xe2
- false, // [227] 0xe3
- false, // [228] 0xe4
- false, // [229] 0xe5
- false, // [230] 0xe6
- false, // [231] 0xe7
- false, // [232] 0xe8
- false, // [233] 0xe9
- false, // [234] 0xea
- false, // [235] 0xeb
- false, // [236] 0xec
- false, // [237] 0xed
- false, // [238] 0xee
- false, // [239] 0xef
- false, // [240] 0xf0
- false, // [241] 0xf1
- false, // [242] 0xf2
- false, // [243] 0xf3
- false, // [244] 0xf4
- false, // [245] 0xf5
- false, // [246] 0xf6
- false, // [247] 0xf7
- false, // [248] 0xf8
- false, // [249] 0xf9
- false, // [250] 0xfa
- false, // [251] 0xfb
- false, // [252] 0xfc
- false, // [253] 0xfd
- false, // [254] 0xfe
- false, // [255] 0xff
- };
-
-bool g_IsNucleoChar[256] =
- {
- false, // [ 0] 0x00
- false, // [ 1] 0x01
- false, // [ 2] 0x02
- false, // [ 3] 0x03
- false, // [ 4] 0x04
- false, // [ 5] 0x05
- false, // [ 6] 0x06
- false, // [ 7] 0x07
- false, // [ 8] 0x08
- false, // [ 9] 0x09
- false, // [ 10] 0x0a
- false, // [ 11] 0x0b
- false, // [ 12] 0x0c
- false, // [ 13] 0x0d
- false, // [ 14] 0x0e
- false, // [ 15] 0x0f
- false, // [ 16] 0x10
- false, // [ 17] 0x11
- false, // [ 18] 0x12
- false, // [ 19] 0x13
- false, // [ 20] 0x14
- false, // [ 21] 0x15
- false, // [ 22] 0x16
- false, // [ 23] 0x17
- false, // [ 24] 0x18
- false, // [ 25] 0x19
- false, // [ 26] 0x1a
- false, // [ 27] 0x1b
- false, // [ 28] 0x1c
- false, // [ 29] 0x1d
- false, // [ 30] 0x1e
- false, // [ 31] 0x1f
- false, // [ 32] ' '
- false, // [ 33] '!'
- false, // [ 34] '"'
- false, // [ 35] '#'
- false, // [ 36] '$'
- false, // [ 37] '%'
- false, // [ 38] '&'
- false, // [ 39] '''
- false, // [ 40] '('
- false, // [ 41] ')'
- false, // [ 42] '*'
- false, // [ 43] '+'
- false, // [ 44] ','
- false, // [ 45] '-'
- false, // [ 46] '.'
- false, // [ 47] '/'
- false, // [ 48] '0'
- false, // [ 49] '1'
- false, // [ 50] '2'
- false, // [ 51] '3'
- false, // [ 52] '4'
- false, // [ 53] '5'
- false, // [ 54] '6'
- false, // [ 55] '7'
- false, // [ 56] '8'
- false, // [ 57] '9'
- false, // [ 58] ':'
- false, // [ 59] ';'
- false, // [ 60] '<'
- false, // [ 61] '='
- false, // [ 62] '>'
- false, // [ 63] '?'
- false, // [ 64] '@'
- true, // [ 65] 'A' (Nucleotide)
- false, // [ 66] 'B'
- true, // [ 67] 'C' (Nucleotide)
- false, // [ 68] 'D'
- false, // [ 69] 'E'
- false, // [ 70] 'F'
- true, // [ 71] 'G' (Nucleotide)
- false, // [ 72] 'H'
- false, // [ 73] 'I'
- false, // [ 74] 'J'
- false, // [ 75] 'K'
- false, // [ 76] 'L'
- false, // [ 77] 'M'
- true, // [ 78] 'N' (Nucleotide)
- false, // [ 79] 'O'
- false, // [ 80] 'P'
- false, // [ 81] 'Q'
- false, // [ 82] 'R'
- false, // [ 83] 'S'
- true, // [ 84] 'T' (Nucleotide)
- true, // [ 85] 'U' (Nucleotide)
- false, // [ 86] 'V'
- false, // [ 87] 'W'
- false, // [ 88] 'X'
- false, // [ 89] 'Y'
- false, // [ 90] 'Z'
- false, // [ 91] '['
- false, // [ 92] '\'
- false, // [ 93] ']'
- false, // [ 94] '^'
- false, // [ 95] '_'
- false, // [ 96] '`'
- true, // [ 97] 'A' (Nucleotide)
- false, // [ 98] 'B'
- true, // [ 99] 'C' (Nucleotide)
- false, // [100] 'D'
- false, // [101] 'E'
- false, // [102] 'F'
- true, // [103] 'G' (Nucleotide)
- false, // [104] 'H'
- false, // [105] 'I'
- false, // [106] 'J'
- false, // [107] 'K'
- false, // [108] 'L'
- false, // [109] 'M'
- true, // [110] 'N' (Nucleotide)
- false, // [111] 'O'
- false, // [112] 'P'
- false, // [113] 'Q'
- false, // [114] 'R'
- false, // [115] 'S'
- true, // [116] 'T' (Nucleotide)
- true, // [117] 'U' (Nucleotide)
- false, // [118] 'V'
- false, // [119] 'W'
- false, // [120] 'X'
- false, // [121] 'Y'
- false, // [122] 'Z'
- false, // [123] '{'
- false, // [124] '|'
- false, // [125] '}'
- false, // [126] '~'
- false, // [127] 0x7f
- false, // [128] 0x80
- false, // [129] 0x81
- false, // [130] 0x82
- false, // [131] 0x83
- false, // [132] 0x84
- false, // [133] 0x85
- false, // [134] 0x86
- false, // [135] 0x87
- false, // [136] 0x88
- false, // [137] 0x89
- false, // [138] 0x8a
- false, // [139] 0x8b
- false, // [140] 0x8c
- false, // [141] 0x8d
- false, // [142] 0x8e
- false, // [143] 0x8f
- false, // [144] 0x90
- false, // [145] 0x91
- false, // [146] 0x92
- false, // [147] 0x93
- false, // [148] 0x94
- false, // [149] 0x95
- false, // [150] 0x96
- false, // [151] 0x97
- false, // [152] 0x98
- false, // [153] 0x99
- false, // [154] 0x9a
- false, // [155] 0x9b
- false, // [156] 0x9c
- false, // [157] 0x9d
- false, // [158] 0x9e
- false, // [159] 0x9f
- false, // [160] 0xa0
- false, // [161] 0xa1
- false, // [162] 0xa2
- false, // [163] 0xa3
- false, // [164] 0xa4
- false, // [165] 0xa5
- false, // [166] 0xa6
- false, // [167] 0xa7
- false, // [168] 0xa8
- false, // [169] 0xa9
- false, // [170] 0xaa
- false, // [171] 0xab
- false, // [172] 0xac
- false, // [173] 0xad
- false, // [174] 0xae
- false, // [175] 0xaf
- false, // [176] 0xb0
- false, // [177] 0xb1
- false, // [178] 0xb2
- false, // [179] 0xb3
- false, // [180] 0xb4
- false, // [181] 0xb5
- false, // [182] 0xb6
- false, // [183] 0xb7
- false, // [184] 0xb8
- false, // [185] 0xb9
- false, // [186] 0xba
- false, // [187] 0xbb
- false, // [188] 0xbc
- false, // [189] 0xbd
- false, // [190] 0xbe
- false, // [191] 0xbf
- false, // [192] 0xc0
- false, // [193] 0xc1
- false, // [194] 0xc2
- false, // [195] 0xc3
- false, // [196] 0xc4
- false, // [197] 0xc5
- false, // [198] 0xc6
- false, // [199] 0xc7
- false, // [200] 0xc8
- false, // [201] 0xc9
- false, // [202] 0xca
- false, // [203] 0xcb
- false, // [204] 0xcc
- false, // [205] 0xcd
- false, // [206] 0xce
- false, // [207] 0xcf
- false, // [208] 0xd0
- false, // [209] 0xd1
- false, // [210] 0xd2
- false, // [211] 0xd3
- false, // [212] 0xd4
- false, // [213] 0xd5
- false, // [214] 0xd6
- false, // [215] 0xd7
- false, // [216] 0xd8
- false, // [217] 0xd9
- false, // [218] 0xda
- false, // [219] 0xdb
- false, // [220] 0xdc
- false, // [221] 0xdd
- false, // [222] 0xde
- false, // [223] 0xdf
- false, // [224] 0xe0
- false, // [225] 0xe1
- false, // [226] 0xe2
- false, // [227] 0xe3
- false, // [228] 0xe4
- false, // [229] 0xe5
- false, // [230] 0xe6
- false, // [231] 0xe7
- false, // [232] 0xe8
- false, // [233] 0xe9
- false, // [234] 0xea
- false, // [235] 0xeb
- false, // [236] 0xec
- false, // [237] 0xed
- false, // [238] 0xee
- false, // [239] 0xef
- false, // [240] 0xf0
- false, // [241] 0xf1
- false, // [242] 0xf2
- false, // [243] 0xf3
- false, // [244] 0xf4
- false, // [245] 0xf5
- false, // [246] 0xf6
- false, // [247] 0xf7
- false, // [248] 0xf8
- false, // [249] 0xf9
- false, // [250] 0xfa
- false, // [251] 0xfb
- false, // [252] 0xfc
- false, // [253] 0xfd
- false, // [254] 0xfe
- false, // [255] 0xff
- };
-
-bool g_IsACGTU[256] =
- {
- false, // [ 0] 0x00
- false, // [ 1] 0x01
- false, // [ 2] 0x02
- false, // [ 3] 0x03
- false, // [ 4] 0x04
- false, // [ 5] 0x05
- false, // [ 6] 0x06
- false, // [ 7] 0x07
- false, // [ 8] 0x08
- false, // [ 9] 0x09
- false, // [ 10] 0x0a
- false, // [ 11] 0x0b
- false, // [ 12] 0x0c
- false, // [ 13] 0x0d
- false, // [ 14] 0x0e
- false, // [ 15] 0x0f
- false, // [ 16] 0x10
- false, // [ 17] 0x11
- false, // [ 18] 0x12
- false, // [ 19] 0x13
- false, // [ 20] 0x14
- false, // [ 21] 0x15
- false, // [ 22] 0x16
- false, // [ 23] 0x17
- false, // [ 24] 0x18
- false, // [ 25] 0x19
- false, // [ 26] 0x1a
- false, // [ 27] 0x1b
- false, // [ 28] 0x1c
- false, // [ 29] 0x1d
- false, // [ 30] 0x1e
- false, // [ 31] 0x1f
- false, // [ 32] ' '
- false, // [ 33] '!'
- false, // [ 34] '"'
- false, // [ 35] '#'
- false, // [ 36] '$'
- false, // [ 37] '%'
- false, // [ 38] '&'
- false, // [ 39] '''
- false, // [ 40] '('
- false, // [ 41] ')'
- false, // [ 42] '*'
- false, // [ 43] '+'
- false, // [ 44] ','
- false, // [ 45] '-'
- false, // [ 46] '.'
- false, // [ 47] '/'
- false, // [ 48] '0'
- false, // [ 49] '1'
- false, // [ 50] '2'
- false, // [ 51] '3'
- false, // [ 52] '4'
- false, // [ 53] '5'
- false, // [ 54] '6'
- false, // [ 55] '7'
- false, // [ 56] '8'
- false, // [ 57] '9'
- false, // [ 58] ':'
- false, // [ 59] ';'
- false, // [ 60] '<'
- false, // [ 61] '='
- false, // [ 62] '>'
- false, // [ 63] '?'
- false, // [ 64] '@'
- true, // [ 65] 'A' (ACGT)
- false, // [ 66] 'B'
- true, // [ 67] 'C' (ACGT)
- false, // [ 68] 'D'
- false, // [ 69] 'E'
- false, // [ 70] 'F'
- true, // [ 71] 'G' (ACGT)
- false, // [ 72] 'H'
- false, // [ 73] 'I'
- false, // [ 74] 'J'
- false, // [ 75] 'K'
- false, // [ 76] 'L'
- false, // [ 77] 'M'
- false, // [ 78] 'N'
- false, // [ 79] 'O'
- false, // [ 80] 'P'
- false, // [ 81] 'Q'
- false, // [ 82] 'R'
- false, // [ 83] 'S'
- true, // [ 84] 'T' (ACGT)
- true, // [ 85] 'U' (ACGT)
- false, // [ 86] 'V'
- false, // [ 87] 'W'
- false, // [ 88] 'X'
- false, // [ 89] 'Y'
- false, // [ 90] 'Z'
- false, // [ 91] '['
- false, // [ 92] '\'
- false, // [ 93] ']'
- false, // [ 94] '^'
- false, // [ 95] '_'
- false, // [ 96] '`'
- true, // [ 97] 'A' (ACGT)
- false, // [ 98] 'B'
- true, // [ 99] 'C' (ACGT)
- false, // [100] 'D'
- false, // [101] 'E'
- false, // [102] 'F'
- true, // [103] 'G' (ACGT)
- false, // [104] 'H'
- false, // [105] 'I'
- false, // [106] 'J'
- false, // [107] 'K'
- false, // [108] 'L'
- false, // [109] 'M'
- false, // [110] 'N'
- false, // [111] 'O'
- false, // [112] 'P'
- false, // [113] 'Q'
- false, // [114] 'R'
- false, // [115] 'S'
- true, // [116] 'T' (ACGT)
- true, // [117] 'U' (ACGT)
- false, // [118] 'V'
- false, // [119] 'W'
- false, // [120] 'X'
- false, // [121] 'Y'
- false, // [122] 'Z'
- false, // [123] '{'
- false, // [124] '|'
- false, // [125] '}'
- false, // [126] '~'
- false, // [127] 0x7f
- false, // [128] 0x80
- false, // [129] 0x81
- false, // [130] 0x82
- false, // [131] 0x83
- false, // [132] 0x84
- false, // [133] 0x85
- false, // [134] 0x86
- false, // [135] 0x87
- false, // [136] 0x88
- false, // [137] 0x89
- false, // [138] 0x8a
- false, // [139] 0x8b
- false, // [140] 0x8c
- false, // [141] 0x8d
- false, // [142] 0x8e
- false, // [143] 0x8f
- false, // [144] 0x90
- false, // [145] 0x91
- false, // [146] 0x92
- false, // [147] 0x93
- false, // [148] 0x94
- false, // [149] 0x95
- false, // [150] 0x96
- false, // [151] 0x97
- false, // [152] 0x98
- false, // [153] 0x99
- false, // [154] 0x9a
- false, // [155] 0x9b
- false, // [156] 0x9c
- false, // [157] 0x9d
- false, // [158] 0x9e
- false, // [159] 0x9f
- false, // [160] 0xa0
- false, // [161] 0xa1
- false, // [162] 0xa2
- false, // [163] 0xa3
- false, // [164] 0xa4
- false, // [165] 0xa5
- false, // [166] 0xa6
- false, // [167] 0xa7
- false, // [168] 0xa8
- false, // [169] 0xa9
- false, // [170] 0xaa
- false, // [171] 0xab
- false, // [172] 0xac
- false, // [173] 0xad
- false, // [174] 0xae
- false, // [175] 0xaf
- false, // [176] 0xb0
- false, // [177] 0xb1
- false, // [178] 0xb2
- false, // [179] 0xb3
- false, // [180] 0xb4
- false, // [181] 0xb5
- false, // [182] 0xb6
- false, // [183] 0xb7
- false, // [184] 0xb8
- false, // [185] 0xb9
- false, // [186] 0xba
- false, // [187] 0xbb
- false, // [188] 0xbc
- false, // [189] 0xbd
- false, // [190] 0xbe
- false, // [191] 0xbf
- false, // [192] 0xc0
- false, // [193] 0xc1
- false, // [194] 0xc2
- false, // [195] 0xc3
- false, // [196] 0xc4
- false, // [197] 0xc5
- false, // [198] 0xc6
- false, // [199] 0xc7
- false, // [200] 0xc8
- false, // [201] 0xc9
- false, // [202] 0xca
- false, // [203] 0xcb
- false, // [204] 0xcc
- false, // [205] 0xcd
- false, // [206] 0xce
- false, // [207] 0xcf
- false, // [208] 0xd0
- false, // [209] 0xd1
- false, // [210] 0xd2
- false, // [211] 0xd3
- false, // [212] 0xd4
- false, // [213] 0xd5
- false, // [214] 0xd6
- false, // [215] 0xd7
- false, // [216] 0xd8
- false, // [217] 0xd9
- false, // [218] 0xda
- false, // [219] 0xdb
- false, // [220] 0xdc
- false, // [221] 0xdd
- false, // [222] 0xde
- false, // [223] 0xdf
- false, // [224] 0xe0
- false, // [225] 0xe1
- false, // [226] 0xe2
- false, // [227] 0xe3
- false, // [228] 0xe4
- false, // [229] 0xe5
- false, // [230] 0xe6
- false, // [231] 0xe7
- false, // [232] 0xe8
- false, // [233] 0xe9
- false, // [234] 0xea
- false, // [235] 0xeb
- false, // [236] 0xec
- false, // [237] 0xed
- false, // [238] 0xee
- false, // [239] 0xef
- false, // [240] 0xf0
- false, // [241] 0xf1
- false, // [242] 0xf2
- false, // [243] 0xf3
- false, // [244] 0xf4
- false, // [245] 0xf5
- false, // [246] 0xf6
- false, // [247] 0xf7
- false, // [248] 0xf8
- false, // [249] 0xf9
- false, // [250] 0xfa
- false, // [251] 0xfb
- false, // [252] 0xfc
- false, // [253] 0xfd
- false, // [254] 0xfe
- false, // [255] 0xff
- };
-
-float g_AminoFreqs[20] =
- {
- 0.0777f, // 'A' = Ala
- 0.0161f, // 'C' = Cys
- 0.0527f, // 'D' = Asp
- 0.0631f, // 'E' = Glu
- 0.0417f, // 'F' = Phe
- 0.0718f, // 'G' = Gly
- 0.0238f, // 'H' = His
- 0.0606f, // 'I' = Ile
- 0.0601f, // 'K' = Lys
- 0.0906f, // 'L' = Leu
- 0.0233f, // 'M' = Met
- 0.0439f, // 'N' = Asn
- 0.0456f, // 'P' = Pro
- 0.0368f, // 'Q' = Gln
- 0.0526f, // 'R' = Arg
- 0.0639f, // 'S' = Ser
- 0.0570f, // 'T' = Thr
- 0.0712f, // 'V' = Val
- 0.0134f, // 'W' = Trp
- 0.0339f, // 'Y' = Tyr
- };
+++ /dev/null
-#ifndef alpha_h\r
-#define alpha_h\r
-\r
-#include <limits.h>\r
-#include <string>\r
-\r
-using namespace std;\r
-\r
-const unsigned INVALID_LETTER = 0;\r
-const unsigned char INVALID_CHAR = '?';\r
-\r
-extern unsigned g_CharToLetterAmino[];\r
-extern unsigned g_CharToLetterAminoStop[];\r
-extern unsigned char g_LetterToCharAmino[];\r
-extern unsigned g_CharToLetterNucleo[];\r
-extern unsigned char g_LetterToCharNucleo[];\r
-extern unsigned g_CodonWordToAminoLetter[];\r
-extern char g_CodonWordToAminoChar[];\r
-extern unsigned char g_CharToCompChar[];\r
-extern unsigned g_CharToCompLetter[];\r
-extern bool g_IsAminoChar[];\r
-extern bool g_IsNucleoChar[];\r
-extern bool g_IsACGTU[];\r
-extern float g_AminoFreqs[];\r
-\r
-extern unsigned g_CharToLetterRed[];\r
-extern unsigned char g_LetterToCharRed[];\r
-extern unsigned g_RedAlphaSize;\r
-\r
-void LogRedAlphaRed();\r
-void ReadRedAlphaFromFile(const string &FileName);\r
-unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
- unsigned char c3);\r
-\r
-static inline bool AminoLetterIsStartCodon(unsigned char Letter)\r
- {\r
- return Letter == 10;\r
- }\r
-\r
-static inline bool AminoLetterIsStopCodon(unsigned char Letter)\r
- {\r
- return Letter == 20;\r
- }\r
-\r
-const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo);\r
-const char *WordToStrNucleo(unsigned Word, unsigned WordLength);\r
-const char *WordToStrAmino(unsigned Word, unsigned WordLength);\r
-const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str);\r
-\r
-#endif // alpha_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "alpha.h"\r
-#include "timing.h"\r
-\r
-bool isgap(byte c)\r
- {\r
- return c == '-' || c == '.';\r
- }\r
-\r
-const char *WordToStrAmino(unsigned Word, unsigned WordLength)\r
- {\r
- static char Str[32];\r
- for (unsigned i = 0; i < WordLength; ++i)\r
- {\r
- unsigned Letter = Word%20;\r
- Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
- Word /= 20;\r
- }\r
- Str[WordLength] = 0;\r
- return Str;\r
- }\r
-\r
-const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str)\r
- {\r
- for (unsigned i = 0; i < WordLength; ++i)\r
- {\r
- unsigned Letter = Word%20;\r
- Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
- Word /= 20;\r
- }\r
- Str[WordLength] = 0;\r
- return Str;\r
- }\r
-\r
-const char *WordToStrNucleo(unsigned Word, unsigned WordLength)\r
- {\r
- static char Str[32];\r
- for (unsigned i = 0; i < WordLength; ++i)\r
- {\r
- unsigned Letter = Word%4;\r
- Str[WordLength-i-1] = g_LetterToCharNucleo[Letter];\r
- Word /= 4;\r
- }\r
- Str[WordLength] = 0;\r
- return Str;\r
- }\r
-\r
-const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo)\r
- {\r
- return (Nucleo ? WordToStrNucleo : WordToStrAmino)(Word, WordLength);\r
- }\r
-\r
-byte *RevCompAlloc(const byte *Seq, unsigned L)\r
- {\r
- byte *RCSeq = MYALLOC(byte, L, Alpha);\r
-\r
- for (unsigned i = 0; i < L; ++i)\r
- RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
-\r
- return RCSeq;\r
- }\r
-\r
-void RevCompInPlace(byte *Seq, unsigned L)\r
- {\r
- unsigned L1 = L - 1;\r
- unsigned L2 = L/2;\r
- for (unsigned i = 0; i < L2; ++i)\r
- {\r
- unsigned j = L1 - i;\r
- unsigned ci = Seq[i];\r
- unsigned cj = Seq[j];\r
-\r
- unsigned ri = g_CharToCompChar[ci];\r
- unsigned rj = g_CharToCompChar[cj];\r
-\r
- Seq[i] = rj;\r
- Seq[j] = ri;\r
- }\r
-\r
- if (L%2 == 1)\r
- Seq[L2] = g_CharToCompChar[Seq[L2]];\r
- }\r
-\r
-void RevComp(const byte *Seq, unsigned L, byte *RCSeq)\r
- {\r
- for (unsigned i = 0; i < L; ++i)\r
- RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
- }\r
-\r
-unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
- unsigned char c3)\r
- {\r
- unsigned Letter1 = g_CharToLetterNucleo[c1];\r
- unsigned Letter2 = g_CharToLetterNucleo[c2];\r
- unsigned Letter3 = g_CharToLetterNucleo[c3];\r
- unsigned Word = Letter1*(4*4) + Letter2*4 + Letter3;\r
-\r
- unsigned Letter = g_CodonWordToAminoLetter[Word];\r
- return g_LetterToCharAmino[Letter];\r
- }\r
+++ /dev/null
-#ifndef chainer_h\r
-#define chainer_h\r
-\r
-#include "hsp.h"\r
-#include "seq.h"\r
-#include <list>\r
-\r
-const float BAD_SCORE = -9e9f;\r
-\r
-struct TargetHit\r
- {\r
- unsigned TargetIndex;\r
- unsigned TargetLo;\r
- unsigned TargetHi;\r
- int QueryFrame;\r
- float RawScore; // SOMETIMES USED FOR BIT SCORE!!!\r
-// unsigned TargetLength;\r
-\r
- void LogMe() const\r
- {\r
- Log("lo %u, hi %u, frame %d, score %.1f\n",\r
- TargetLo, TargetHi, QueryFrame, RawScore);\r
- }\r
- };\r
-\r
-struct ChainData\r
- {\r
- unsigned LastHSPIndex;\r
- unsigned Ahi;\r
- unsigned Bhi;\r
- float Score;\r
- };\r
-\r
-class Chainer\r
- {\r
-public:\r
- HSPData **m_HSPs; // memory owned elsewhere\r
- unsigned m_HSPCount;\r
- unsigned m_MaxHSPCount;\r
-\r
- BPData *m_BPs;\r
-\r
- unsigned *m_PrevHSPIndexes; // Predecessor in chain\r
- float *m_HSPIndexToChainScore;\r
-\r
- list<unsigned> m_Chains; // Live HSP indexes\r
-\r
-public:\r
- Chainer();\r
- ~Chainer();\r
- void Reset();\r
- void Clear(bool ctor = false);\r
- float Chain(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
- unsigned &OptChainLength);\r
- bool ResolveOverlaps(const SeqData &SA, const SeqData &SB, double MinScore,\r
- const float * const *SubstMx, HSPData **InHSPs, unsigned InHSPCount,\r
- HSPData **OutHSPs, unsigned &OutHSPCount);\r
- void ResolveOverlap(HSPData &HSP1, HSPData &HSP2);\r
-\r
- float ChainBrute(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
- unsigned &OptChainLength);\r
- void LogMe() const;\r
- void LogHSPs(HSPData **HSPs, unsigned HSPCount) const;\r
- void LogBPs() const;\r
-\r
- static bool IsValidChain(HSPData **HSPs, unsigned HSPCount);\r
- static void AssertValidChain(HSPData **HSPs, unsigned HSPCount);\r
- static void LogChain(HSPData **HSPs, unsigned HSPCount);\r
- static void LogChain2(HSPData **HSPs, unsigned HSPCount);\r
- static float GetChainScore(HSPData **HSPs, unsigned HSPCount);\r
-\r
-private:\r
- void AllocHSPCount(unsigned MaxHSPCount);\r
- void SetBPs();\r
- void SortBPs();\r
- unsigned FindBestChainLT(unsigned Ahi, unsigned Bhi);\r
- };\r
-\r
-#endif // chainer_h\r
+++ /dev/null
-#ifndef chime_h\r
-#define chime_h\r
-\r
-#include "seq.h"\r
-\r
-struct ChimeHit2\r
- {\r
- string QLabel;\r
- string ALabel;\r
- string BLabel;\r
- string Q3;\r
- string A3;\r
- string B3;\r
-\r
- //unsigned LY, LN, LA, LD;\r
- //unsigned RY, RN, RA, RD;\r
- double PctIdQT, PctIdQA, PctIdQB, PctIdQM, PctIdAB;\r
-\r
- unsigned ColLo;\r
- unsigned ColXLo;\r
- unsigned ColXHi;\r
- unsigned ColHi;\r
- unsigned QXLo;\r
- unsigned QXHi;\r
-\r
- double Div;\r
- double Score;\r
- double H;\r
-\r
- unsigned CS_LY, CS_LN, CS_LA, CS_RY, CS_RN, CS_RA;\r
-\r
- float AbQ;\r
- float AbA;\r
- float AbB;\r
-\r
- ChimeHit2()\r
- {\r
- Clear();\r
- }\r
-\r
- void Clear()\r
- {\r
- Q3.clear();\r
- A3.clear();\r
- B3.clear();\r
- QLabel.clear();\r
- ALabel.clear();\r
- BLabel.clear();\r
-\r
- //LY = LN = LA = LD = UINT_MAX;\r
- //RY = RN = RA = RD = UINT_MAX;\r
- ColLo = ColHi = QXLo = QXHi = ColXLo = ColXHi = UINT_MAX;\r
- CS_LY = CS_LN = CS_LA = CS_RY = CS_RN = CS_RA = UINT_MAX;\r
- PctIdQT = PctIdQA = PctIdQB = PctIdQM = PctIdAB = -1.0;\r
- Div = -1.0;\r
- H = -1.0;\r
- Score = -1.0;\r
- AbQ = AbA = AbB = -1.0f;\r
- };\r
-\r
- bool Accept() const\r
- {\r
- return Score >= opt_minh && Div >= opt_mindiv && CS_LY >= opt_mindiffs && CS_RY >= opt_mindiffs;\r
- }\r
-\r
- void LogMe() const\r
- {\r
- Log("@L %c ", yon(Score >= 1.0 && Div >= 1.0));\r
- Log(" %.4f", Score);\r
- Log(" LY %u LN %u LA %u", CS_LY, CS_LN, CS_LA);\r
- Log(" RY %u RN %u RA %u", CS_RY, CS_RN, CS_RA);\r
- Log(" Div %.1f%%", Div);\r
- Log(" Q=%s", QLabel.c_str());\r
- Log(" A=%s", ALabel.c_str());\r
- Log(" B=%s", BLabel.c_str());\r
- Log(" QA %.1f%% QB=%.1f%% AB=%.1f%% QM=%.1f%%", PctIdQA, PctIdQB, PctIdAB, PctIdQM);\r
- Log("\n");\r
- }\r
-\r
- bool operator<(const ChimeHit2 &rhs) const\r
- {\r
- if (Score == rhs.Score)\r
- return Div > rhs.Div;\r
- return Score > rhs.Score;\r
- }\r
- };\r
-\r
-static inline bool isacgt(char c)\r
- {\r
- return c == 'A' || c == 'C' || c == 'G' || c == 'T';\r
- }\r
-\r
-static bool inline isgap(char c)\r
- {\r
- return c == '-' || c == '.';\r
- }\r
-\r
-void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los);\r
-float GetAbFromLabel(const string &Label);\r
-void WriteChimeHitCS(FILE *f, const ChimeHit2 &Hit);\r
-void WriteChimeHit(FILE *f, const ChimeHit2 &Hit);\r
-void WriteChimeFileHdr(FILE *f);\r
-\r
-#endif // chime_h\r
delete buf4;
istringstream iss (tempBuf,istringstream::in);
- iss >> name >> taxInfo;
+ iss >> name; m->gobble(iss);
+ iss >> taxInfo;
taxonomy[name] = taxInfo;
phyloTree->addSeqToTree(name, taxInfo);
}
//read template seqs and save
while (!inTax.eof()) {
- inTax >> name >> taxInfo;
-
+ inTax >> name; m->gobble(inTax);
+ inTax >> taxInfo;
+
taxonomy[name] = taxInfo;
phyloTree->addSeqToTree(name, taxInfo);
+++ /dev/null
-#ifndef diagbox_h\r
-#define diagbox_h\r
-\r
-struct DiagBox;\r
-\r
-void GetDiagBox(unsigned LA, unsigned LB, unsigned DiagLo, unsigned DiagHi, DiagBox &Box);\r
-void GetDiagRange(unsigned LA, unsigned LB, unsigned d,\r
- unsigned &mini, unsigned &minj, unsigned &maxi, unsigned &maxj);\r
-void GetDiagLoHi(unsigned LA, unsigned LB, const char *Path,\r
- unsigned &dlo, unsigned &dhi);\r
-\r
-struct DiagBox\r
- {\r
- DiagBox()\r
- {\r
- }\r
-\r
- DiagBox(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
- {\r
- //GetDiagBox(LA, LB, DiagLo, DiagHi, *this);\r
- //Validate();\r
- Init(LA_, LB_, DiagLo, DiagHi);\r
- }\r
-\r
- void Init(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
- {\r
- GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this);\r
- Validate();\r
- }\r
-\r
- unsigned LA;\r
- unsigned LB;\r
-\r
- unsigned dlo;\r
- unsigned dhi;\r
-\r
- unsigned dlo_mini;\r
- unsigned dlo_minj;\r
-\r
- unsigned dlo_maxi;\r
- unsigned dlo_maxj;\r
-\r
- unsigned dhi_mini;\r
- unsigned dhi_minj;\r
-\r
- unsigned dhi_maxi;\r
- unsigned dhi_maxj;\r
-\r
- unsigned GetDiag(unsigned i, unsigned j) const\r
- {\r
- return LA - i + j;\r
- }\r
-\r
-// i, j are positions 0..LA-1, 0..LB-1.\r
- bool InBox(unsigned i, unsigned j) const\r
- {\r
- unsigned d = GetDiag(i, j);\r
- return d >= dlo && d <= dhi;\r
- }\r
-\r
-/***\r
-i, j are 0-based prefix lengths 0..LA, 0..LB.\r
-\r
-A full path is in the box iff all match pairs are in the box.\r
-\r
-A partial path that aligns a prefix of A to a prefix of B as\r
-in D.P.) is in the box iff it is is the prefix of at least\r
-one full path that is in the box.\r
-\r
-A D.P. matrix entry X[i][j] is in the box iff there is at\r
-least one full path aligning the first i letters of A and\r
-the first j letters of B ending in a column of type X, i.e.\r
-if there exists a partial path in the box that ends in X.\r
-\r
-Assume terminals appear in all paths, and DI/ID forbidden.\r
-\r
-Intuitively seems that by these definitions D is in box iff\r
-DM or MD is in box, I is in box iff IM or MI is in box.\r
-Don't have proof..\r
-***/\r
- bool InBoxDPM(unsigned i, unsigned j) const\r
- {\r
- // Special case for M[0][0]\r
- if (i == 0 && j == 0)\r
- return true;\r
- if (i == 0 || j == 0)\r
- return false;\r
- unsigned d = GetDiag(i-1, j-1);\r
- return d >= dlo && d <= dhi;\r
- }\r
-\r
- bool InBoxDPD(unsigned i, unsigned j) const\r
- {\r
- bool MD = i == 0 ? false : InBoxDPM(i-1, j);\r
- bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
- return MD || DM;\r
- }\r
-\r
- bool InBoxDPI(unsigned i, unsigned j) const\r
- {\r
- bool MI = j == 0 ? false : InBoxDPM(i, j-1);\r
- bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
- return MI || IM;\r
- }\r
-\r
- // d = LA - i + j = 1 .. LA+LB-1\r
- void Validate() const\r
- {\r
- asserta(dlo <= dhi);\r
- asserta(dlo >= GetDiag(LA-1, 0));\r
- asserta(dhi <= GetDiag(0, LB-1));\r
-\r
- asserta(GetDiag(dlo_mini, dlo_minj) == dlo);\r
- asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo);\r
- asserta(GetDiag(dhi_mini, dhi_minj) == dhi);\r
- asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi);\r
-\r
- asserta(dlo_mini >= dhi_mini);\r
- asserta(dlo_minj <= dhi_minj);\r
- asserta(dlo_maxi >= dhi_maxi);\r
- asserta(dlo_maxj <= dhi_maxj);\r
- }\r
-\r
- unsigned GetMini() const\r
- {\r
- return dhi_mini;\r
- }\r
-\r
- unsigned GetMaxi() const\r
- {\r
- return dlo_maxi;\r
- }\r
-\r
- unsigned GetMinj() const\r
- {\r
- return dlo_minj;\r
- }\r
-\r
- unsigned GetMaxj() const\r
- {\r
- return dhi_maxj;\r
- }\r
-/***\r
- i = 0..LA-1\r
- j = 0..LB-1\r
- d = LA - i + j = 1 .. LA+LB-1\r
- j = d - LA + i\r
- i = LA - d + j\r
-***/\r
- void GetRange_j(unsigned i, unsigned &Startj, unsigned &Endj) const\r
- {\r
- // j = d - LA + i\r
- if (dlo + i >= LA)\r
- Startj = dlo + i - LA;\r
- else\r
- Startj = 0;\r
-\r
- if (Startj >= LB)\r
- Startj = LB - 1;\r
-\r
- if (dhi + i + 1 >= LA)\r
- Endj = dhi + i + 1 - LA;\r
- else\r
- Endj = 0;\r
-\r
- if (Endj > LB)\r
- Endj = LB;\r
-\r
- asserta(Endj >= Startj);\r
- }\r
-\r
- void LogMe() const\r
- {\r
- Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n",\r
- LA, LB,\r
- dlo,\r
- dlo_mini, dlo_minj,\r
- dlo_maxi, dlo_maxj,\r
- dhi,\r
- dhi_mini, dhi_minj,\r
- dhi_maxi, dhi_maxj,\r
- GetMini(), GetMaxi(),\r
- GetMinj(), GetMaxj());\r
- }\r
- };\r
-\r
-typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB,
- unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
-
-const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB,
- unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
-\r
-#endif // diagbox_h\r
+++ /dev/null
-#ifndef dp_h\r
-#define dp_h\r
-\r
-#define SAVE_FAST 0\r
-\r
-#include "myutils.h"\r
-#include "mx.h"\r
-#include "seqdb.h"\r
-#include "diagbox.h"\r
-#include "path.h"\r
-#include "alnparams.h"\r
-#include "alnheuristics.h"\r
-#include "hspfinder.h"\r
-\r
-typedef void (*OnPathFn)(const string &Path, bool Full);\r
-\r
-enum XType\r
- {\r
- XType_Full=1,\r
- XType_Fwd=2,\r
- XType_Bwd=3,\r
- };\r
-\r
-// public\r
-float ViterbiBrute(const byte *A, unsigned LA, const byte *B, unsigned LB, \r
- unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
-\r
-float ViterbiSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, PathData &PD);\r
-\r
-float ViterbiSimpleBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, unsigned DiagLo, unsigned DiagHi, PathData &PD);\r
-\r
-float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, PathData &PD);\r
-\r
-float ViterbiFastBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
-\r
-float ViterbiFastMainDiag(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- unsigned BandRadius, const AlnParams &AP, PathData &PD);\r
-\r
-float XDropFwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
-\r
-float XDropBwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
-\r
-float XDropFwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
-\r
-float XDropBwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
-\r
-void XDropAlign(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- unsigned AncLoi, unsigned AncLoj, unsigned AncLen, const AlnParams &AP,\r
- float XDrop, HSPData &HSP, PathData &PD);\r
-\r
-float SWSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
- unsigned &Hij, PathData &PD);\r
-\r
-float SWFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
- unsigned &Hij, PathData &PD);\r
-\r
-void SWFast2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
- HSPData &HSP, PathData &PD);\r
-\r
-void SWSimple2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
- HSPData &HSP, PathData &PD);\r
-\r
-float SWUngapped(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const float * const *SubstMx, unsigned &LoA, unsigned &LoB, unsigned &Len);\r
-\r
-void SWUngapped2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
- HSPData &HSP);\r
-\r
-float SWFastNTB(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP);\r
-\r
-void GlobalAlignBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
- const AlnParams &AP, unsigned BandRadius, PathData &PD);\r
-\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &AP,\r
- const AlnHeuristics &AH, HSPFinder &HF, float MinFractId, float &HSPFractId,\r
- PathData &PD);\r
-\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
-\r
-void GetBruteMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-void GetSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-void GetSimpleBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-void GetXDropFwdSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-#if SAVE_FAST\r
-void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-void GetFastBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
-#endif\r
-\r
-// private\r
-void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD);\r
-void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,\r
- unsigned &Leni, unsigned &Lenj, PathData &PD);\r
-void EnumPaths(unsigned L1, unsigned L2, bool SubPaths, OnPathFn OnPath);\r
-void AllocBit(unsigned LA, unsigned LB);\r
-\r
-const byte TRACEBITS_DM = 0x01;\r
-const byte TRACEBITS_IM = 0x02;\r
-const byte TRACEBITS_MD = 0x04;\r
-const byte TRACEBITS_MI = 0x08;\r
-const byte TRACEBITS_SM = 0x10;\r
-const byte TRACEBITS_UNINIT = ~0x1f;\r
-\r
-extern Mx<byte> g_Mx_TBBit;\r
-extern float *g_DPRow1;\r
-extern float *g_DPRow2;\r
-extern byte **g_TBBit;\r
-\r
-static inline void Max_xM(float &Score, float MM, float DM, float IM, byte &State)\r
- {\r
- Score = MM;\r
- State = 'M';\r
-\r
- if (DM > Score)\r
- {\r
- Score = DM;\r
- State = 'D';\r
- }\r
- if (IM > Score)\r
- {\r
- Score = IM;\r
- State = 'I';\r
- }\r
- }\r
-\r
-static inline void Max_xD(float &Score, float MD, float DD, byte &State)\r
- {\r
- if (MD >= DD)\r
- {\r
- Score = MD;\r
- State = 'M';\r
- }\r
- else\r
- {\r
- Score = DD;\r
- State = 'D';\r
- }\r
- }\r
-\r
-static inline void Max_xI(float &Score, float MI, float II, byte &State)\r
- {\r
- if (MI >= II)\r
- {\r
- Score = MI;\r
- State = 'M';\r
- }\r
- else\r
- {\r
- Score = II;\r
- State = 'I';\r
- }\r
- }\r
-\r
-#endif // dp_h\r
+++ /dev/null
-#ifndef evalue_h\r
-#define evalue_h\r
-\r
-#include <float.h>\r
-\r
-void SetKarlin(double GappedLambda, double UngappedLambda,\r
- double GappedK, double UngappedK, double DBLength);\\r
-\r
-double GetKarlinDBLength();\r
-void SetKarlinDBLength(double DBLength);\r
-void LogKarlin();\r
-void SetKarlinAmino(double DBLength);\r
-void SetKarlinNucleo(double DBLength);\r
-void SetKarlin(double DBLength, bool Nucleo);\r
-double ComputeBitScoreGapped(double Score);\r
-double ComputeBitScoreUngapped(double Score);\r
-double ComputeEvalueGapped(double Score, unsigned QueryLength);\r
-double ComputeEvalueUngapped(double Score, unsigned QueryLength);\r
-double ComputeMinScoreGivenEvalueAGapped(double Evalue, unsigned Area);\r
-double ComputeMinScoreGivenEvalueAUngapped(double Evalue, unsigned Area);\r
-double ComputeMinScoreGivenEvalueQGapped(double Evalue, unsigned QueryLength);\r
-double ComputeMinScoreGivenEvalueQUngapped(double Evalue, unsigned QueryLength);\r
-double ComputeEvalueGappedFromBitScore(double BitScore, unsigned QueryLength);\r
-\r
-#endif // evalue_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "alpha.h"\r
-\r
-//unsigned g_MaxL = 0;\r
-\r
-static bool *g_IsChar = g_IsAminoChar;\r
-\r
-// Term gaps allowed in query (A) only\r
-static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,\r
- char *ptrDesc)\r
- {\r
- if (*Path == 'D')\r
- {\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(term gap in Query)");\r
- return 0;\r
- }\r
-\r
- const char *LastM = 0;\r
- for (const char *p = Path; *p; ++p)\r
- if (*p == 'M')\r
- LastM = p;\r
-\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Ids = 0;\r
- unsigned Diffs = 0;\r
- unsigned Cols = 0;\r
- for (const char *p = Path; *p && p != LastM; ++p)\r
- {\r
- ++Cols;\r
- char c = *p;\r
- if (c == 'M')\r
- {\r
- byte a = toupper(A[PosA]);\r
- byte b = toupper(B[PosB]);\r
- if (g_IsChar[a] && g_IsChar[b])\r
- {\r
- if (a == b)\r
- ++Ids;\r
- else\r
- ++Diffs;\r
- }\r
- else\r
- --Cols;\r
- }\r
- if (c == 'D' || c == 'I')\r
- ++Diffs;\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
-\r
- double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
- return FractId;\r
- }\r
-\r
-static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,\r
- char *ptrDesc)\r
- {\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Ids = 0;\r
- unsigned Diffs = 0;\r
- unsigned Cols = 0;\r
- for (const char *p = Path; *p; ++p)\r
- {\r
- ++Cols;\r
- char c = *p;\r
- if (c == 'M')\r
- {\r
- byte a = toupper(A[PosA]);\r
- byte b = toupper(B[PosB]);\r
- if (g_IsChar[a] && g_IsChar[b])\r
- {\r
- if (a == b)\r
- ++Ids;\r
- else\r
- ++Diffs;\r
- }\r
- else\r
- --Cols;\r
- }\r
- if (c == 'D' || c == 'I')\r
- ++Diffs;\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
-\r
- double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
- return FractId;\r
- }\r
-\r
-static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,\r
- const char *Path, char *ptrDesc)\r
- {\r
- unsigned i = 0;\r
- unsigned FirstM = UINT_MAX;\r
- unsigned LastM = UINT_MAX;\r
- for (const char *p = Path; *p; ++p)\r
- {\r
- if (*p == 'M')\r
- {\r
- if (FirstM == UINT_MAX)\r
- FirstM = i;\r
- LastM = i;\r
- }\r
- ++i;\r
- }\r
- if (FirstM == UINT_MAX)\r
- {\r
- if (ptrDesc != 0)\r
- strcpy(ptrDesc, "(no matches)");\r
- return 0.0;\r
- }\r
-\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Ids = 0;\r
- unsigned Diffs = 0;\r
- unsigned Cols = 0;\r
- for (unsigned i = 0; i < FirstM; ++i)\r
- {\r
- char c = Path[i];\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
-\r
- for (unsigned i = FirstM; i <= LastM; ++i)\r
- {\r
- ++Cols;\r
- char c = Path[i];\r
- if (c == 'M')\r
- {\r
- byte a = toupper(A[PosA]);\r
- byte b = toupper(B[PosB]);\r
- if (g_IsChar[a] && g_IsChar[b])\r
- {\r
- if (a == b)\r
- ++Ids;\r
- else\r
- ++Diffs;\r
- }\r
- else\r
- --Cols;\r
- }\r
- if (c == 'D' || c == 'I')\r
- ++Diffs;\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
-\r
- double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
- return FractId;\r
- }\r
-\r
-static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,\r
- char *ptrDesc)\r
- {\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Mismatches = 0;\r
- unsigned Gaps = 0;\r
- for (const char *p = Path; *p; ++p)\r
- {\r
- char c = *p;\r
- if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))\r
- ++Mismatches;\r
- if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))\r
- ++Gaps;\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
- unsigned Diffs = Gaps + Mismatches;\r
- double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",\r
- Gaps, Diffs, PosB);\r
- double FractId = 1.0 - FractDiffs;\r
- if (FractId < 0.0)\r
- return 0.0;\r
- return FractId;\r
- }\r
-\r
-static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,\r
- char *ptrDesc)\r
- {\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Ids = 0;\r
- unsigned Wilds = 0;\r
- unsigned Cols = 0;\r
- for (const char *p = Path; *p; ++p)\r
- {\r
- ++Cols;\r
- char c = *p;\r
- if (c == 'M')\r
- {\r
- byte a = toupper(A[PosA]);\r
- byte b = toupper(B[PosB]);\r
- if (g_IsChar[a] && g_IsChar[b])\r
- {\r
- if (a == b)\r
- ++Ids;\r
- }\r
- else\r
- ++Wilds;\r
- }\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
- asserta(Cols >= Wilds);\r
- Cols -= Wilds;\r
- double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
- return FractId;\r
- }\r
-\r
-static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,\r
- char *ptrDesc)\r
- {\r
- unsigned PosA = 0;\r
- unsigned PosB = 0;\r
- unsigned Ids = 0;\r
- unsigned Wilds = 0;\r
- for (const char *p = Path; *p; ++p)\r
- {\r
- char c = *p;\r
- if (c == 'M')\r
- {\r
- byte a = toupper(A[PosA]);\r
- byte b = toupper(B[PosB]);\r
- if (g_IsChar[a] && g_IsChar[b])\r
- {\r
- if (a == b)\r
- ++Ids;\r
- }\r
- else\r
- ++Wilds;\r
- }\r
- if (c == 'M' || c == 'D')\r
- ++PosA;\r
- if (c == 'M' || c == 'I')\r
- ++PosB;\r
- }\r
- unsigned MinLen = min(PosA, PosB) - Wilds;\r
- double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);\r
- return FractId;\r
- }\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
- bool Nucleo, char *ptrDesc, unsigned IdDef)\r
- {\r
- if (Nucleo)\r
- g_IsChar = g_IsACGTU;\r
- else\r
- g_IsChar = g_IsAminoChar;\r
-\r
- if (Path == 0)\r
- {\r
- if (ptrDesc != 0)\r
- strcpy(ptrDesc, "(NULL path)");\r
- return 0.0;\r
- }\r
-\r
- unsigned ColCount = (unsigned) strlen(Path);\r
- if (ColCount == 0)\r
- return 0.0;\r
-\r
- if (opt_leftjust)\r
- {\r
- if (Path[0] != 'M' || Path[ColCount-1] == 'D')\r
- {\r
- if (ptrDesc != 0)\r
- strcpy(ptrDesc, "(leftjust)");\r
- return 0.0;\r
- }\r
- }\r
-\r
- if (opt_rightjust)\r
- {\r
- if (Path[0] == 'D' || Path[ColCount-1] != 'M')\r
- {\r
- if (ptrDesc != 0)\r
- strcpy(ptrDesc, "(rightjust)");\r
- return 0.0;\r
- }\r
- }\r
-\r
- double FractId = 0.0;\r
- //if (opt_idprefix > 0)\r
- // {\r
- // for (unsigned i = 0; i < opt_idprefix; ++i)\r
- // {\r
- // char c = Path[i];\r
- // if (c != 'M' || toupper(A[i]) != toupper(B[i]))\r
- // {\r
- // if (ptrDesc != 0)\r
- // sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",\r
- // i, opt_idprefix);\r
- // return 0.0;\r
- // }\r
- // }\r
- // }\r
-\r
- //if (opt_idsuffix > 0)\r
- // {\r
- // unsigned Cols = strlen(Path);\r
- // for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)\r
- // {\r
- // unsigned k = Cols - 1 - i;\r
- // char c = Path[k];\r
- // if (c != 'M' || toupper(A[k]) != toupper(B[k]))\r
- // {\r
- // if (ptrDesc != 0)\r
- // sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",\r
- // i, opt_idsuffix);\r
- // return 0.0;\r
- // }\r
- // }\r
- // }\r
-\r
- if (opt_maxqgap > 0 || opt_maxtgap > 0)\r
- {\r
- unsigned L = 0;\r
- const char *LastM = 0;\r
- for (const char *p = Path; *p; ++p)\r
- if (*p == 'M')\r
- LastM = p;\r
-\r
-// g_MaxL = 0;\r
- for (const char *p = Path; *p && p != LastM; ++p)\r
- {\r
- char c = *p;\r
- switch (c)\r
- {\r
- case 'M':\r
- if (L > 0)\r
- {\r
- if (p[-1] == 'D')\r
- {\r
- if (L > opt_maxtgap)\r
- {\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(maxtgap)");\r
- return 0.0;\r
- }\r
- }\r
- else if (p[-1] == 'I')\r
- {\r
- if (L > opt_maxqgap)\r
- {\r
- if (ptrDesc != 0)\r
- sprintf(ptrDesc, "(maxqgap)");\r
- return 0.0;\r
- }\r
- }\r
- else\r
- asserta(false);\r
- }\r
- L = 0;\r
- break;\r
-\r
- case 'D':\r
- case 'I':\r
- ++L;\r
- //if (L > g_MaxL)\r
- // g_MaxL = L;\r
- break;\r
-\r
- default:\r
- asserta(false);\r
- }\r
- }\r
- }\r
-\r
- switch (IdDef)\r
- {\r
- case 0:\r
- FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);\r
- break;\r
-\r
- case 1:\r
- FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);\r
- break;\r
-\r
- case 2:\r
- FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);\r
- break;\r
-\r
- case 3:\r
- FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);\r
- break;\r
-\r
- case 4:\r
- FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);\r
- break;\r
-\r
- case 5:\r
- FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);\r
- break;\r
-\r
- default:\r
- Die("--iddef %u invalid", opt_iddef);\r
- }\r
-\r
- return FractId;\r
- }\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
- bool Nucleo, char *ptrDesc)\r
- {\r
- return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);\r
- }\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)\r
- {\r
- return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);\r
- }\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)\r
- {\r
- return GetFractIdGivenPath(A, B, Path.c_str(), true);\r
- }\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)\r
- {\r
- return GetFractIdGivenPath(A, B, Path, true);\r
- }\r
+++ /dev/null
-#include "myutils.h"\r
-#include "chime.h"\r
-#include "ultra.h"\r
-#include <set>\r
-\r
-void AddTargets(Ultra &U, const SeqData &Query, set<unsigned> &TargetIndexes);\r
-\r
-void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los)\r
- {\r
- Los.clear();\r
-\r
- if (L <= opt_minchunk)\r
- {\r
- Length = L;\r
- Los.push_back(0);\r
- return;\r
- }\r
-\r
- Length = (L - 1)/opt_chunks + 1;\r
- if (Length < opt_minchunk)\r
- Length = opt_minchunk;\r
-\r
- unsigned Lo = 0;\r
- for (;;)\r
- {\r
- if (Lo + Length >= L)\r
- {\r
- Lo = L - Length - 1;\r
- Los.push_back(Lo);\r
- return;\r
- }\r
- Los.push_back(Lo);\r
- Lo += Length;\r
- }\r
- }\r
-\r
-void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
- vector<unsigned> &Parents)\r
- {\r
- Parents.clear();\r
-\r
- set<unsigned> TargetIndexes;\r
-\r
- unsigned QL = QSD.L;\r
-\r
- SeqData QuerySD = QSD;\r
-\r
- unsigned ChunkLength;\r
- vector<unsigned> ChunkLos;\r
- GetChunkInfo(QL, ChunkLength, ChunkLos);\r
- unsigned ChunkCount = SIZE(ChunkLos);\r
- for (unsigned ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)\r
- {\r
- unsigned Lo = ChunkLos[ChunkIndex];\r
- asserta(Lo + ChunkLength <= QL);\r
-\r
- const byte *Chunk = QSD.Seq + Lo;\r
-\r
- // THIS MESSES UP --self!!\r
- //char Prefix[32];\r
- //sprintf(Prefix, "%u|", Lo);\r
- //string ChunkLabel = string(Prefix) + string(QSD.Label);\r
-\r
- //QuerySD.Label = ChunkLabel.c_str();\r
- QuerySD.Seq = Chunk;\r
- QuerySD.L = ChunkLength;\r
-\r
- AddTargets(U, QuerySD, TargetIndexes);\r
-\r
- Lo += ChunkLength;\r
- }\r
-\r
- for (set<unsigned>::const_iterator p = TargetIndexes.begin();\r
- p != TargetIndexes.end(); ++p)\r
- {\r
- unsigned TargetIndex = *p;\r
- bool Accept = true;\r
- if (AbQ > 0.0f)\r
- {\r
- const char *TargetLabel = U.GetSeedLabel(TargetIndex);\r
- float AbT = GetAbFromLabel(string(TargetLabel));\r
- if (AbT > 0.0f && AbT < opt_abskew*AbQ)\r
- Accept = false;\r
- }\r
-\r
- if (Accept)\r
- Parents.push_back(TargetIndex);\r
- }\r
- }\r
+++ /dev/null
-//#if UCHIMES\r
-\r
-#include "dp.h"\r
-#include "seq.h"\r
-\r
-static AlnParams g_AP;\r
-static bool g_APInitDone = false;\r
-\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, PathData &PD)\r
- {\r
- if (!g_APInitDone)\r
- {\r
- g_AP.InitFromCmdLine(true);\r
- g_APInitDone = true;\r
- }\r
-\r
- ViterbiFast(Query.Seq, Query.L, Target.Seq, Target.L, g_AP, PD);\r
- return true;\r
- }\r
-\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path)\r
- {\r
- PathData PD;\r
- GlobalAlign(Query, Target, PD);\r
- Path = string(PD.Start);\r
- return true;\r
- }\r
-\r
-bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &/*AP*/,\r
- const AlnHeuristics &AH, HSPFinder &/*HF*/, float /*MinFractId*/, float &/*HSPId*/, PathData &PD)\r
- {\r
- PD.Clear();\r
- string Path;\r
- bool Found = GlobalAlign(Query, Target, Path);\r
- if (!Found)\r
- return false;\r
- unsigned n = SIZE(Path);\r
- PD.Alloc(n+1);\r
- memcpy(PD.Front, Path.c_str(), n);\r
- PD.Start = PD.Front;\r
- PD.Start[n] = 0;\r
- return true;\r
- }\r
-\r
-//#endif // UCHIMES\r
+++ /dev/null
-"\n"
-"Usage\n"
-"-----\n"
-"\n"
-"uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n"
-" [--uchimealns results.alns]\n"
-"\n"
-"Options\n"
-"-------\n"
-"\n"
-"--input filename\n"
-" Query sequences in FASTA format.\n"
-" If the --db option is not specificed, uchime uses de novo\n"
-" detection. In de novo mode, relative abundance must be given\n"
-" by a string /ab=xxx/ somewhere in the label, where xxx is a\n"
-" floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n"
-"\n"
-"--db filename\n"
-" Reference database in FASTA format.\n"
-" Optional, if not specified uchime uses de novo mode.\n"
-"\n"
-" ***WARNING*** The database is searched ONLY on the plus strand.\n"
-" You MUST include reverse-complemented sequences in the database\n"
-" if you want both strands to be searched.\n"
-"\n"
-"--abskew x\n"
-" Minimum abundance skew. Default 1.9. De novo mode only.\n"
-" Abundance skew is:\n"
-" min [ abund(parent1), abund(parent2) ] / abund(query).\n"
-"\n"
-"--uchimeout filename\n"
-" Output in tabbed format with one record per query sequence.\n"
-" First field is score (h), second field is query label.\n"
-" For details, see manual.\n"
-"\n"
-"--uchimealns filename\n"
-" Multiple alignments of query sequences to parents in human-\n"
-" readable format. Alignments show columns with differences\n"
-" that support or contradict a chimeric model.\n"
-"\n"
-"--minh h\n"
-" Mininum score to report chimera. Default 0.3. Values from 0.1\n"
-" to 5 might be reasonable. Lower values increase sensitivity\n"
-" but may report more false positives. If you decrease --xn,\n"
-" you may need to increase --minh, and vice versa.\n"
-"\n"
-"--mindiv div\n"
-" Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n"
-" %%identity between query sequence and the closest candidate for\n"
-" being a parent. If you don't care about very close chimeras,\n"
-" then you could increase --mindiv to, say, 1.0 or 2.0, and\n"
-" also decrease --min h, say to 0.1, to increase sensitivity.\n"
-" How well this works will depend on your data. Best is to\n"
-" tune parameters on a good benchmark.\n"
-"\n"
-"--xn beta\n"
-" Weight of a no vote, also called the beta parameter. Default 8.0.\n"
-" Decreasing this weight to around 3 or 4 may give better\n"
-" performance on denoised data.\n"
-"\n"
-"--dn n\n"
-" Pseudo-count prior on number of no votes. Default 1.4. Probably\n"
-" no good reason to change this unless you can retune to a good\n"
-" benchmark for your data. Reasonable values are probably in the\n"
-" range from 0.2 to 2.\n"
-"\n"
-"--xa w\n"
-" Weight of an abstain vote. Default 1. So far, results do not\n"
-" seem to be very sensitive to this parameter, but if you have\n"
-" a good training set might be worth trying. Reasonable values\n"
-" might range from 0.1 to 2.\n"
-"\n"
-"--chunks n\n"
-" Number of chunks to extract from the query sequence when searching\n"
-" for parents. Default 4.\n"
-"\n"
-"--[no]ovchunks\n"
-" [Do not] use overlapping chunks. Default do not.\n"
-"\n"
-"--minchunk n\n"
-" Minimum length of a chunk. Default 64.\n"
-"\n"
-"--idsmoothwindow w\n"
-" Length of id smoothing window. Default 32.\n"
-"\n"
-"--minsmoothid f\n"
-" Minimum factional identity over smoothed window of candidate parent.\n"
-" Default 0.95.\n"
-"\n"
-"--maxp n\n"
-" Maximum number of candidate parents to consider. Default 2. In tests so\n"
-" far, increasing --maxp gives only a very small improvement in sensivity\n"
-" but tends to increase the error rate quite a bit.\n"
-"\n"
-"--[no]skipgaps\n"
-"--[no]skipgaps2\n"
-" These options control how gapped columns affect counting of diffs.\n"
-" If --skipgaps is specified, columns containing gaps do not found as diffs.\n"
-" If --skipgaps2 is specified, if column is immediately adjacent to\n"
-" a column containing a gap, it is not counted as a diff.\n"
-" Default is --skipgaps --skipgaps2.\n"
-"\n"
-"--minlen L\n"
-"--maxlen L\n"
-" Minimum and maximum sequence length. Defaults 10, 10000.\n"
-" Applies to both query and reference sequences.\n"
-"\n"
-"--ucl\n"
-" Use local-X alignments. Default is global-X. On tests so far, global-X\n"
-" is always better; this option is retained because it just might work\n"
-" well on some future type of data.\n"
-"\n"
-"--queryfract f\n"
-" Minimum fraction of the query sequence that must be covered by a local-X\n"
-" alignment. Default 0.5. Applies only when --ucl is specified.\n"
-"\n"
-"--quiet\n"
-" Do not display progress messages on stderr.\n"
-"\n"
-"--log filename\n"
-" Write miscellaneous information to the log file. Mostly of interest\n"
-" to me (the algorithm developer). Use --verbose to get more info.\n"
-"\n"
-"--self\n"
-" In reference database mode, exclude a reference sequence if it has\n"
-" the same label as the query. This is useful for benchmarking by using\n"
-" the ref db as a query to test for false positives.\n"
+++ /dev/null
-#ifndef hsp_h\r
-#define hsp_h 1\r
-\r
-struct HSPData\r
- {\r
- unsigned Loi;\r
- unsigned Loj;\r
- unsigned Leni;\r
- unsigned Lenj;\r
- float Score;\r
- unsigned User;\r
-\r
- unsigned GetLength() const\r
- {\r
- if (Leni != Lenj)\r
- Die("HSP::GetLength(): Leni %u, Lenj %u, Loi %u, Loj %u, Score %.1f",\r
- Leni, Lenj, Loi, Loj, Score);\r
-\r
- return Leni;\r
- }\r
-\r
- unsigned GetHii() const\r
- {\r
- assert(Leni > 0);\r
- return Loi + Leni - 1;\r
- }\r
-\r
- unsigned GetHij() const\r
- {\r
- assert(Lenj > 0);\r
- return Loj + Lenj - 1;\r
- }\r
-\r
- bool LeftA() const\r
- {\r
- return Loi == 0;\r
- }\r
-\r
- bool LeftB() const\r
- {\r
- return Loj == 0;\r
- }\r
-\r
- bool RightA(unsigned LA) const\r
- {\r
- return Loi + Leni == LA;\r
- }\r
-\r
- bool RightB(unsigned LB) const\r
- {\r
- return Loj + Lenj == LB;\r
- }\r
-\r
- unsigned GetIdCount(const byte *A, const byte *B) const\r
- {\r
- unsigned Count = 0;\r
- unsigned K = GetLength();\r
- for (unsigned k = 0; k < K; ++k)\r
- {\r
- byte a = A[Loi+k];\r
- byte b = B[Loj+k];\r
- if (toupper(a) == toupper(b))\r
- Count++;\r
- }\r
- return Count;\r
- }\r
-\r
- double OverlapFract(const HSPData &HSP) const\r
- {\r
- if (Leni == 0 || Lenj == 0)\r
- return 0.0;\r
-\r
- unsigned MaxLoi = max(Loi, HSP.Loi);\r
- unsigned MaxLoj = max(Loj, HSP.Loj);\r
- unsigned MinHii = min(GetHii(), HSP.GetHii());\r
- unsigned MinHij = min(GetHij(), HSP.GetHij());\r
-\r
- unsigned Ovi = (MinHii < MaxLoi) ? 0 : MinHii - MaxLoi;\r
- unsigned Ovj = (MinHij < MaxLoj) ? 0 : MinHij - MaxLoj;\r
-\r
- asserta(Ovi <= Leni && Ovj <= Lenj);\r
- return double(Ovi*Ovj)/double(Leni*Lenj);\r
- }\r
-\r
- bool operator<(const HSPData &rhs) const\r
- {\r
- return Loi < rhs.Loi;\r
- }\r
-\r
- void LogMe() const\r
- {\r
- Log("Loi=%u Loj=%u Li=%u Lj=%u Score=%.1f\n", Loi, Loj, Leni, Lenj, Score);\r
- }\r
-\r
- void LogMe2() const\r
- {\r
- Log("(%u-%u,%u-%u/%.1f)", Loi, GetHii(), Loj, GetHij(), Score);\r
- }\r
- };\r
-\r
-// Bendpoint\r
-struct BPData\r
- {\r
- unsigned Pos;\r
- bool IsLo;\r
- unsigned Index;\r
-\r
- void LogMe() const\r
- {\r
- Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index);\r
- }\r
- };\r
-\r
-#endif // hsp_h\r
+++ /dev/null
-#ifndef hspfinder_h
-#define hspfinder_h
-
-#include "seq.h"
-
-class HSPFinder
- {
-public:
- void SetA(const SeqData &/*SD*/) {}
- void SetB(const SeqData &/*SD*/) {}
- };
-
-#endif // hspfinder_h
+++ /dev/null
-#include "myutils.h"\r
-#include "sfasta.h"\r
-#include "path.h"\r
-#include "dp.h"\r
-\r
-void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
- const string &PathQA, const string &PathQB,\r
- string &Q3, string &A3, string &B3)\r
- {\r
- Q3.clear();\r
- A3.clear();\r
- B3.clear();\r
-\r
-#if DEBUG\r
- {\r
- unsigned QLen = 0;\r
- unsigned ALen = 0;\r
- for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
- {\r
- char c = PathQA[i];\r
- if (c == 'M' || c == 'D')\r
- ++QLen;\r
- if (c == 'M' || c == 'I')\r
- ++ALen;\r
- }\r
- asserta(QLen == QSD.L);\r
- asserta(ALen == ASD.L);\r
- }\r
- {\r
- unsigned QLen = 0;\r
- unsigned BLen = 0;\r
- for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
- {\r
- char c = PathQB[i];\r
- if (c == 'M' || c == 'D')\r
- ++QLen;\r
- if (c == 'M' || c == 'I')\r
- ++BLen;\r
- }\r
- asserta(QLen == QSD.L);\r
- asserta(BLen == BSD.L);\r
- }\r
-#endif\r
-\r
- const byte *Q = QSD.Seq;\r
- const byte *A = ASD.Seq;\r
- const byte *B = BSD.Seq;\r
-\r
- unsigned LQ = QSD.L;\r
- unsigned LA = ASD.L;\r
- unsigned LB = BSD.L;\r
-\r
- vector<unsigned> InsertCountsA(LQ+1, 0);\r
- unsigned QPos = 0;\r
- for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
- {\r
- char c = PathQA[i];\r
- if (c == 'M' || c == 'D')\r
- ++QPos;\r
- else\r
- {\r
- asserta(c == 'I');\r
- asserta(QPos <= LQ);\r
- ++(InsertCountsA[QPos]);\r
- }\r
- }\r
-\r
- vector<unsigned> InsertCountsB(LQ+1, 0);\r
- QPos = 0;\r
- for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
- {\r
- char c = PathQB[i];\r
- if (c == 'M' || c == 'D')\r
- ++QPos;\r
- else\r
- {\r
- asserta(c == 'I');\r
- asserta(QPos <= LQ);\r
- ++(InsertCountsB[QPos]);\r
- }\r
- }\r
-\r
- vector<unsigned> InsertCounts;\r
- for (unsigned i = 0; i <= LQ; ++i)\r
- {\r
- unsigned is = max(InsertCountsA[i], InsertCountsB[i]);\r
- InsertCounts.push_back(is);\r
- }\r
-\r
- for (unsigned i = 0; i < LQ; ++i)\r
- {\r
- for (unsigned k = 0; k < InsertCounts[i]; ++k)\r
- Q3.push_back('-');\r
- asserta(i < LQ);\r
- Q3.push_back(toupper(Q[i]));\r
- }\r
- for (unsigned k = 0; k < InsertCounts[LQ]; ++k)\r
- Q3.push_back('-');\r
-\r
-// A\r
- QPos = 0;\r
- unsigned APos = 0;\r
- unsigned is = 0;\r
- for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
- {\r
- char c = PathQA[i];\r
- if (c == 'M' || c == 'D')\r
- {\r
- unsigned isq = InsertCounts[QPos];\r
- asserta(is <= isq);\r
- for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
- A3.push_back('-');\r
- is = 0;\r
- ++QPos;\r
- }\r
- if (c == 'M')\r
- {\r
- asserta(APos < LA);\r
- A3.push_back(toupper(A[APos++]));\r
- }\r
- else if (c == 'D')\r
- A3.push_back('-');\r
- else if (c == 'I')\r
- {\r
- ++is;\r
- asserta(APos < LA);\r
- A3.push_back(toupper(A[APos++]));\r
- }\r
- }\r
- asserta(is <= InsertCounts[LQ]);\r
- for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
- A3.push_back('-');\r
- asserta(QPos == LQ);\r
- asserta(APos == LA);\r
-\r
-// B\r
- QPos = 0;\r
- unsigned BPos = 0;\r
- is = 0;\r
- for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
- {\r
- char c = PathQB[i];\r
- if (c == 'M' || c == 'D')\r
- {\r
- asserta(is <= InsertCounts[QPos]);\r
- for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
- B3.push_back('-');\r
- is = 0;\r
- ++QPos;\r
- }\r
- if (c == 'M')\r
- {\r
- asserta(BPos < LB);\r
- B3.push_back(toupper(B[BPos++]));\r
- }\r
- else if (c == 'D')\r
- B3.push_back('-');\r
- else if (c == 'I')\r
- {\r
- ++is;\r
- asserta(BPos < LB);\r
- B3.push_back(toupper(B[BPos++]));\r
- }\r
- }\r
- asserta(is <= InsertCounts[LQ]);\r
- for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
- B3.push_back('-');\r
- asserta(APos == LA);\r
- asserta(BPos == LB);\r
-\r
- asserta(SIZE(Q3) == SIZE(A3));\r
- asserta(SIZE(Q3) == SIZE(B3));\r
- }\r
#if you are a linux user use the following line
#CXXFLAGS += -mtune=native -march=native -m64
- CXXFLAGS += -DBIT_VERSION
+ CXXFLAGS += -DBIT_VERSION
FORTRAN_FLAGS = -m64
endif
+++ /dev/null
-#include "myutils.h"\r
-#include "mx.h"\r
-#include "seqdb.h"\r
-#include "seq.h"\r
-\r
-char ProbToChar(float p);\r
-\r
-list<MxBase *> *MxBase::m_Matrices = 0;\r
-unsigned MxBase::m_AllocCount;\r
-unsigned MxBase::m_ZeroAllocCount;\r
-unsigned MxBase::m_GrowAllocCount;\r
-double MxBase::m_TotalBytes;\r
-double MxBase::m_MaxBytes;\r
-\r
-static const char *LogizeStr(const char *s)\r
- {\r
- double d = atof(s);\r
- d = log(d);\r
- return TypeToStr<float>(float(d));\r
- }\r
-\r
-static const char *ExpizeStr(const char *s)\r
- {\r
- double d = atof(s);\r
- d = exp(d);\r
- return TypeToStr<float>(float(d));\r
- }\r
-\r
-void MxBase::OnCtor(MxBase *Mx)\r
- {\r
- if (m_Matrices == 0)\r
- m_Matrices = new list<MxBase *>;\r
- asserta(m_Matrices != 0);\r
- m_Matrices->push_front(Mx);\r
- }\r
-\r
-void MxBase::OnDtor(MxBase *Mx)\r
- {\r
- if (m_Matrices == 0)\r
- {\r
- Warning("MxBase::OnDtor, m_Matrices = 0");\r
- return;\r
- }\r
- for (list<MxBase*>::iterator p = m_Matrices->begin();\r
- p != m_Matrices->end(); ++p)\r
- {\r
- if (*p == Mx)\r
- {\r
- m_Matrices->erase(p);\r
- if (m_Matrices->empty())\r
- delete m_Matrices;\r
- return;\r
- }\r
- }\r
- Warning("MxBase::OnDtor, not found");\r
- }\r
-\r
-//float **MxBase::Getf(const string &Name)\r
-// {\r
-// Mx<float> *m = (Mx<float> *) Get(Name);\r
-// asserta(m->GetTypeSize() == sizeof(float));\r
-// return m->GetData();\r
-// }\r
-//\r
-//double **MxBase::Getd(const string &Name)\r
-// {\r
-// Mx<double> *m = (Mx<double> *) Get(Name);\r
-// asserta(m->GetTypeSize() == sizeof(double));\r
-// return m->GetData();\r
-// }\r
-//\r
-//char **MxBase::Getc(const string &Name)\r
-// {\r
-// Mx<char> *m = (Mx<char> *) Get(Name);\r
-// asserta(m->GetTypeSize() == sizeof(char));\r
-// return m->GetData();\r
-// }\r
-\r
-void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqDB *DB, unsigned IdA, unsigned IdB)\r
- {\r
- Alloc(Name, RowCount, ColCount, DB, IdA, IdB, 0, 0);\r
- }\r
-\r
-void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqData *SA, const SeqData *SB)\r
- {\r
- Alloc(Name, RowCount, ColCount, 0, UINT_MAX, UINT_MAX, SA, SB);\r
- }\r
-\r
-void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqDB *DB, unsigned IdA, unsigned IdB, const SeqData *SA, const SeqData *SB)\r
- {\r
- StartTimer(MxBase_Alloc);\r
-\r
- ++m_AllocCount;\r
- if (m_AllocatedRowCount == 0)\r
- ++m_ZeroAllocCount;\r
-\r
- if (DB != 0)\r
- {\r
- asserta(IdA != UINT_MAX);\r
- asserta(IdB != UINT_MAX);\r
- asserta(RowCount >= DB->GetSeqLength(IdA) + 1);\r
- asserta(ColCount >= DB->GetSeqLength(IdB) + 1);\r
- }\r
- if (RowCount > m_AllocatedRowCount || ColCount > m_AllocatedColCount)\r
- {\r
- if (m_AllocatedRowCount > 0)\r
- {\r
- if (opt_logmemgrows)\r
- Log("MxBase::Alloc grow %s %u x %u -> %u x %u, %s bytes\n",\r
- Name, m_AllocatedRowCount, m_AllocatedColCount,\r
- RowCount, ColCount,\r
- IntToStr(GetBytes()));\r
- ++m_GrowAllocCount;\r
- }\r
-\r
- m_TotalBytes -= GetBytes();\r
-\r
- PauseTimer(MxBase_Alloc);\r
- StartTimer(MxBase_FreeData);\r
- FreeData();\r
- EndTimer(MxBase_FreeData);\r
- StartTimer(MxBase_Alloc);\r
-\r
- unsigned N = max(RowCount + 16, m_AllocatedRowCount);\r
- unsigned M = max(ColCount + 16, m_AllocatedColCount);\r
- N = max(N, M);\r
-\r
- PauseTimer(MxBase_Alloc);\r
- StartTimer(MxBase_AllocData);\r
- AllocData(N, N);\r
- EndTimer(MxBase_AllocData);\r
- StartTimer(MxBase_Alloc);\r
-\r
- m_TotalBytes += GetBytes();\r
- if (m_TotalBytes > m_MaxBytes)\r
- m_MaxBytes = m_TotalBytes;\r
- }\r
- \r
- unsigned n = sizeof(m_Name)-1;\r
- strncpy(m_Name, Name, n);\r
- m_Name[n] = 0;\r
- m_RowCount = RowCount;\r
- m_ColCount = ColCount;\r
- m_SeqDB = DB;\r
- m_IdA = IdA;\r
- m_IdB = IdB;\r
- m_SA = SA;\r
- m_SB = SB;\r
-\r
- EndTimer(MxBase_Alloc);\r
- }\r
-\r
-void MxBase::LogMe(bool WithData, int Opts) const\r
- {\r
- Log("\n");\r
- if (Opts & OPT_EXP)\r
- Log("Exp ");\r
- else if (Opts & OPT_LOG)\r
- Log("Log ");\r
- bool ZeroBased = ((Opts & OPT_ZERO_BASED) != 0);\r
- Log("%s(%p) Rows %u/%u, Cols %u/%u",\r
- m_Name, this,\r
- m_RowCount, m_AllocatedRowCount,\r
- m_ColCount, m_AllocatedColCount);\r
- if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
- Log(", A=%s", m_SeqDB->GetLabel(m_IdA));\r
- else if (m_SA != 0)\r
- Log(", A=%s", m_SA->Label);\r
- if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
- Log(", B=%s", m_SeqDB->GetLabel(m_IdB));\r
- else if (m_SB != 0)\r
- Log(", B=%s", m_SB->Label);\r
- Log("\n");\r
- if (!WithData || m_RowCount == 0 || m_ColCount == 0)\r
- return;\r
-\r
- const char *z = GetAsStr(0, 0);\r
- unsigned Width = strlen(z);\r
- unsigned Mod = 1;\r
- for (unsigned i = 0; i < Width; ++i)\r
- Mod *= 10;\r
-\r
- if (m_Alpha[0] != 0)\r
- {\r
- Log("// Alphabet=%s\n", m_Alpha);\r
- Log("// ");\r
- unsigned n = strlen(m_Alpha);\r
- for (unsigned j = 0; j < n; ++j)\r
- Log(" %*c", Width, m_Alpha[j]);\r
- Log("\n");\r
- for (unsigned i = 0; i < n; ++i)\r
- {\r
- Log("/* %c */ {", m_Alpha[i]);\r
- unsigned ci = m_Alpha[i];\r
- for (unsigned j = 0; j < n; ++j)\r
- {\r
- unsigned cj = m_Alpha[j];\r
- Log("%s,", GetAsStr(ci, cj));\r
- }\r
- Log("}, // %c\n", m_Alpha[i]);\r
- }\r
- return;\r
- }\r
- else if (m_Alpha2[0] != 0)\r
- {\r
- unsigned n = strlen(m_Alpha2);\r
- Log("// Alphabet=%s\n", m_Alpha2);\r
- Log("// ");\r
- for (unsigned j = 0; j < n; ++j)\r
- Log(" %*c", Width, m_Alpha2[j]);\r
- Log("\n");\r
- for (unsigned i = 0; i < n; ++i)\r
- {\r
- Log("/* %c */ {", m_Alpha2[i]);\r
- unsigned ci = m_Alpha2[i];\r
- for (unsigned j = 0; j < n; ++j)\r
- Log("%s,", GetAsStr(i, j));\r
- Log("}, // %c\n", m_Alpha2[i]);\r
- }\r
- return;\r
- }\r
-\r
- const byte *A = 0;\r
- const byte *B = 0;\r
- if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
- A = m_SeqDB->GetSeq(m_IdA);\r
- else if (m_SA != 0)\r
- A = m_SA->Seq;\r
- if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
- B = m_SeqDB->GetSeq(m_IdB);\r
- else if (m_SB != 0)\r
- B = m_SB->Seq;\r
-\r
- if (B != 0)\r
- {\r
- if (A != 0)\r
- Log(" ");\r
- Log("%5.5s", "");\r
- if (ZeroBased)\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- Log("%*c", Width, B[j]);\r
- else\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- Log("%*c", Width, j == 0 ? ' ' : B[j-1]);\r
- Log("\n");\r
- }\r
-\r
- if (A != 0)\r
- Log(" ");\r
- Log("%5.5s", "");\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- Log("%*u", Width, j%Mod);\r
- Log("\n");\r
-\r
- for (unsigned i = 0; i < m_RowCount; ++i)\r
- {\r
- if (A != 0)\r
- {\r
- if (ZeroBased)\r
- Log("%c ", A[i]);\r
- else\r
- Log("%c ", i == 0 ? ' ' : A[i-1]);\r
- }\r
- Log("%4u ", i);\r
- \r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- {\r
- const char *s = GetAsStr(i, j);\r
- if (Opts & OPT_LOG)\r
- s = LogizeStr(s);\r
- else if (Opts & OPT_EXP)\r
- s = ExpizeStr(s);\r
- Log("%s", s);\r
- }\r
- Log("\n");\r
- }\r
- }\r
-static unsigned g_MatrixFileCount;\r
-\r
-void MxBase::LogCounts()\r
- {\r
- Log("\n");\r
- Log("MxBase::LogCounts()\n");\r
- Log(" What N\n");\r
- Log("---------- ----------\n");\r
- Log(" Allocs %10u\n", m_AllocCount);\r
- Log("ZeroAllocs %10u\n", m_ZeroAllocCount);\r
- Log(" Grows %10u\n", m_GrowAllocCount);\r
- Log(" Bytes %10.10s\n", MemBytesToStr(m_TotalBytes));\r
- Log(" Max bytes %10.10s\n", MemBytesToStr(m_MaxBytes));\r
- }\r
+++ /dev/null
-#ifndef mx_h\r
-#define mx_h\r
-\r
-#include <list>\r
-#include <limits.h>\r
-#include <math.h>\r
-#include "timing.h"\r
-#include "myutils.h"\r
-\r
-const int OPT_LOG = 0x01;\r
-const int OPT_EXP = 0x02;\r
-const int OPT_ZERO_BASED = 0x04;\r
-const float MINUS_INFINITY = -9e9f;\r
-const float UNINIT = -8e8f;\r
-\r
-struct SeqData;\r
-\r
-template<class T> const char *TypeToStr(T t)\r
- {\r
- Die("Unspecialised TypeToStr() called");\r
- ureturn(0);\r
- }\r
-\r
-template<> inline const char *TypeToStr<unsigned short>(unsigned short f)\r
- {\r
- static char s[16];\r
-\r
- sprintf(s, "%12u", f);\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<short>(short f)\r
- {\r
- static char s[16];\r
-\r
- sprintf(s, "%12d", f);\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<int>(int f)\r
- {\r
- static char s[16];\r
-\r
- sprintf(s, "%5d", f);\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<float>(float f)\r
- {\r
- static char s[16];\r
-\r
- if (f == UNINIT)\r
- sprintf(s, "%12.12s", "?");\r
- else if (f < MINUS_INFINITY/2)\r
- sprintf(s, "%12.12s", "*");\r
- else if (f == 0.0f)\r
- sprintf(s, "%12.12s", ".");\r
- else if (f >= -1e5 && f <= 1e5)\r
- sprintf(s, "%12.5f", f);\r
- else\r
- sprintf(s, "%12.4g", f);\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<double>(double f)\r
- {\r
- static char s[16];\r
-\r
- if (f < -1e9)\r
- sprintf(s, "%12.12s", "*");\r
- else if (f == 0.0f)\r
- sprintf(s, "%12.12s", ".");\r
- else if (f >= -1e-5 && f <= 1e5)\r
- sprintf(s, "%12.5f", f);\r
- else\r
- sprintf(s, "%12.4g", f);\r
- return s;\r
- }\r
-\r
-static inline const char *FloatToStr(float f, string &s)\r
- {\r
- s = TypeToStr<float>(f);\r
- return s.c_str();\r
- }\r
-\r
-template<> inline const char *TypeToStr<char>(char c)\r
- {\r
- static char s[2];\r
- s[0] = c;\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<byte>(byte c)\r
- {\r
- static char s[2];\r
- s[0] = c;\r
- return s;\r
- }\r
-\r
-template<> inline const char *TypeToStr<bool>(bool tof)\r
- {\r
- static char s[2];\r
- s[0] = tof ? 'T' : 'F';\r
- return s;\r
- }\r
-\r
-struct SeqDB;\r
-\r
-struct MxBase\r
- {\r
-private:\r
- MxBase(const MxBase &rhs);\r
- MxBase &operator=(const MxBase &rhs);\r
-\r
-public:\r
- char m_Name[32];\r
- char m_Alpha[32];\r
- char m_Alpha2[32];\r
- unsigned m_RowCount;\r
- unsigned m_ColCount;\r
- unsigned m_AllocatedRowCount;\r
- unsigned m_AllocatedColCount;\r
- const SeqDB *m_SeqDB;\r
- unsigned m_IdA;\r
- unsigned m_IdB;\r
- const SeqData *m_SA;\r
- const SeqData *m_SB;\r
-\r
- static list<MxBase *> *m_Matrices;\r
- //static MxBase *Get(const string &Name);\r
- //static float **Getf(const string &Name);\r
- //static double **Getd(const string &Name);\r
- //static char **Getc(const string &Name);\r
-\r
- static unsigned m_AllocCount;\r
- static unsigned m_ZeroAllocCount;\r
- static unsigned m_GrowAllocCount;\r
- static double m_TotalBytes;\r
- static double m_MaxBytes;\r
-\r
- static void OnCtor(MxBase *Mx);\r
- static void OnDtor(MxBase *Mx);\r
-\r
- MxBase()\r
- {\r
- m_AllocatedRowCount = 0;\r
- m_AllocatedColCount = 0;\r
- m_RowCount = 0;\r
- m_ColCount = 0;\r
- m_IdA = UINT_MAX;\r
- m_IdB = UINT_MAX;\r
- m_SeqDB = 0;\r
- OnCtor(this);\r
- }\r
- virtual ~MxBase()\r
- {\r
- OnDtor(this);\r
- }\r
-\r
- virtual unsigned GetTypeSize() const = 0;\r
- virtual unsigned GetBytes() const = 0;\r
-\r
- void Clear()\r
- {\r
- FreeData();\r
- m_AllocatedRowCount = 0;\r
- m_AllocatedColCount = 0;\r
- m_RowCount = 0;\r
- m_ColCount = 0;\r
- m_IdA = UINT_MAX;\r
- m_IdB = UINT_MAX;\r
- m_SA = 0;\r
- m_SB = 0;\r
- }\r
-\r
- bool Empty() const\r
- {\r
- return m_RowCount == 0;\r
- }\r
-\r
- virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0;\r
- virtual void FreeData() = 0;\r
- virtual const char *GetAsStr(unsigned i, unsigned j) const = 0;\r
-\r
- void SetAlpha(const char *Alpha)\r
- {\r
- unsigned n = sizeof(m_Alpha);\r
- strncpy(m_Alpha, Alpha, n);\r
- m_Alpha[n] = 0;\r
- }\r
-\r
- void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqDB *DB, unsigned IdA, unsigned IdB,\r
- const SeqData *SA, const SeqData *SB);\r
-\r
- void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX);\r
-\r
- void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
- const SeqData *SA, const SeqData *SB);\r
-\r
- static void LogAll()\r
- {\r
- Log("\n");\r
- if (m_Matrices == 0)\r
- {\r
- Log("MxBase::m_Matrices=0\n");\r
- return;\r
- }\r
- Log("\n");\r
- Log("AllRows AllCols Sz MB Name\n");\r
- Log("------- ------- ---- -------- ----\n");\r
- double TotalMB = 0;\r
- for (list<MxBase *>::const_iterator p = m_Matrices->begin();\r
- p != m_Matrices->end(); ++p)\r
- {\r
- const MxBase *Mx = *p;\r
- if (Mx == 0)\r
- continue;\r
- //if (Mx->m_RowCount != 0 || ShowEmpty)\r
- // Mx->LogMe(WithData);\r
- unsigned ar = Mx->m_AllocatedRowCount;\r
- if (ar == 0)\r
- continue;\r
- unsigned ac = Mx->m_AllocatedColCount;\r
- unsigned sz = Mx->GetTypeSize();\r
- double MB = (double) ar*(double) ac*(double) sz/1e6;\r
- TotalMB += MB;\r
- Log("%7u %7u %4u %8.2f %s\n", ar, ac, sz, MB, Mx->m_Name);\r
- }\r
- Log(" --------\n");\r
- Log("%7.7s %7.7s %4.4s %8.2f\n", "", "", "", TotalMB);\r
- }\r
-\r
- void LogMe(bool WithData = true, int Opts = 0) const;\r
- static void LogCounts();\r
- };\r
-\r
-template<class T> struct Mx : public MxBase\r
- {\r
-// Disable unimplemented stuff\r
-private:\r
- Mx(Mx &rhs);\r
- Mx &operator=(Mx &rhs);\r
- // const Mx &operator=(const Mx &rhs) const;\r
-\r
-public:\r
- T **m_Data;\r
-\r
- Mx()\r
- {\r
- m_Data = 0;\r
- }\r
- \r
- ~Mx()\r
- {\r
- FreeData();\r
- }\r
-\r
- virtual void AllocData(unsigned RowCount, unsigned ColCount)\r
- {\r
- if (opt_logmemgrows)\r
- Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n",\r
- RowCount, ColCount, IntToStr(GetBytes()), m_Name);\r
- // m_Data = myalloc<T *>(RowCount);\r
- m_Data = MYALLOC(T *, RowCount, Mx);\r
- for (unsigned i = 0; i < RowCount; ++i)\r
- // m_Data[i] = myalloc<T>(ColCount);\r
- m_Data[i] = MYALLOC(T, ColCount, Mx);\r
- AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T));\r
-\r
- m_AllocatedRowCount = RowCount;\r
- m_AllocatedColCount = ColCount;\r
- }\r
-\r
- virtual void FreeData()\r
- {\r
- for (unsigned i = 0; i < m_AllocatedRowCount; ++i)\r
- MYFREE(m_Data[i], m_AllocatedColCount, Mx);\r
- MYFREE(m_Data, m_AllocatedRowCount, Mx);\r
- SubBytes("Mx_AllocData",\r
- m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T));\r
-\r
- m_Data = 0;\r
- m_RowCount = 0;\r
- m_ColCount = 0;\r
- m_AllocatedRowCount = 0;\r
- m_AllocatedColCount = 0;\r
- }\r
-\r
- T **GetData()\r
- {\r
- return (T **) m_Data;\r
- }\r
-\r
- T Get(unsigned i, unsigned j) const\r
- {\r
- assert(i < m_RowCount);\r
- assert(j < m_ColCount);\r
- return m_Data[i][j];\r
- }\r
-\r
- void Put(unsigned i, unsigned j, T x) const\r
- {\r
- assert(i < m_RowCount);\r
- assert(j < m_ColCount);\r
- m_Data[i][j] = x;\r
- }\r
-\r
- T GetOffDiagAvgs(vector<T> &Avgs) const\r
- {\r
- if (m_RowCount != m_ColCount)\r
- Die("GetOffDiagAvgs, not symmetrical");\r
- Avgs.clear();\r
- T Total = T(0);\r
- for (unsigned i = 0; i < m_RowCount; ++i)\r
- {\r
- T Sum = T(0);\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- {\r
- if (j == i)\r
- continue;\r
- Sum += m_Data[i][j];\r
- }\r
- T Avg = Sum/(m_RowCount-1);\r
- Total += Avg;\r
- Avgs.push_back(Avg);\r
- }\r
- return m_RowCount == 0 ? T(0) : Total/m_RowCount;\r
- }\r
-\r
- unsigned GetTypeSize() const\r
- {\r
- return sizeof(T);\r
- }\r
-\r
- virtual unsigned GetBytes() const\r
- {\r
- return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() +\r
- m_AllocatedRowCount*sizeof(T *);\r
- }\r
-\r
- const char *GetAsStr(unsigned i, unsigned j) const\r
- {\r
- return TypeToStr<T>(Get(i, j));\r
- }\r
-\r
- const T *const *const GetData() const\r
- {\r
- return (const T *const *) m_Data;\r
- }\r
-\r
- void Copy(const Mx<T> &rhs)\r
- {\r
- Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB);\r
- const T * const *Data = rhs.GetData();\r
- for (unsigned i = 0; i < m_RowCount; ++i)\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- m_Data[i][j] = Data[i][j];\r
- }\r
-\r
- void Assign(T v)\r
- {\r
- for (unsigned i = 0; i < m_RowCount; ++i)\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- m_Data[i][j] = v;\r
- }\r
-\r
- bool Eq(const Mx &rhs, bool Bwd = false) const\r
- {\r
- if (rhs.m_ColCount != m_ColCount)\r
- return false;\r
- if (rhs.m_RowCount != m_RowCount)\r
- return false;\r
- const T * const*d = rhs.GetData();\r
- int i1 = Bwd ? m_RowCount : 0;\r
- int j1 = Bwd ? m_ColCount : 0;\r
- int i2 = Bwd ? -1 : m_RowCount;\r
- int j2 = Bwd ? -1 : m_ColCount;\r
- for (int i = i1; i != i2; Bwd ? --i : ++i)\r
- for (int j = j1; j != j2; Bwd ? --j : ++j)\r
- {\r
- float x = m_Data[i][j];\r
- float y = d[i][j];\r
- if (x < -1e10 && y < -1e10)\r
- continue;\r
- if (!feq(x, y))\r
- {\r
- Warning("%s[%d][%d] = %g, %s = %g",\r
- m_Name, i, j, x, rhs.m_Name, y);\r
- return false;\r
- }\r
- }\r
- return true;\r
- }\r
-\r
- bool EqMask(const Mx &rhs, const Mx<bool> &Mask) const\r
- {\r
- if (rhs.m_ColCount != m_ColCount)\r
- return false;\r
- if (rhs.m_RowCount != m_RowCount)\r
- return false;\r
-\r
- if (Mask.m_ColCount != m_ColCount)\r
- return false;\r
- if (Mask.m_RowCount != m_RowCount)\r
- return false;\r
-\r
- const T * const*d = rhs.GetData();\r
- bool Bwd = false;\r
- int i1 = Bwd ? m_RowCount : 0;\r
- int j1 = Bwd ? m_ColCount : 0;\r
- int i2 = Bwd ? -1 : m_RowCount;\r
- int j2 = Bwd ? -1 : m_ColCount;\r
- for (int i = i1; i != i2; Bwd ? --i : ++i)\r
- for (int j = j1; j != j2; Bwd ? --j : ++j)\r
- {\r
- if (!Mask.m_Data[i][j])\r
- continue;\r
- float x = m_Data[i][j];\r
- float y = d[i][j];\r
- if (x < -1e10 && y < -1e10)\r
- continue;\r
- if (!feq(x, y))\r
- {\r
- Warning("%s[%d][%d] = %g, %s = %g",\r
- m_Name, i, j, x, rhs.m_Name, y);\r
- return false;\r
- }\r
- }\r
- return true;\r
- }\r
-\r
- void Init(T v)\r
- {\r
- for (unsigned i = 0; i < m_RowCount; ++i)\r
- for (unsigned j = 0; j < m_ColCount; ++j)\r
- m_Data[i][j] = v;\r
- }\r
- };\r
-\r
-void WriteMx(const string &Name, Mx<float> &Mxf);\r
-\r
-template<class T> void ReserveMx(Mx<T> &Mxf, unsigned N = UINT_MAX)\r
- {\r
- if (Mxf.m_AllocatedRowCount > 0)\r
- return;\r
- extern unsigned g_MaxInputSeqLength;\r
- if (N == UINT_MAX)\r
- N = g_MaxInputSeqLength+1;\r
- Mxf.Alloc("(Reserved)", N, N);\r
- }\r
-\r
-#endif // mx_h\r
+++ /dev/null
-#ifndef MY_VERSION\r
-#define MY_VERSION "4.2"\r
-#endif\r
-\r
-STR_OPT( input, 0)\r
-STR_OPT( query, 0)\r
-STR_OPT( db, 0)\r
-STR_OPT( sort, 0)\r
-STR_OPT( output, 0)\r
-STR_OPT( uc, 0)\r
-STR_OPT( clstr2uc, 0)\r
-STR_OPT( uc2clstr, 0)\r
-STR_OPT( uc2fasta, 0)\r
-STR_OPT( uc2fastax, 0)\r
-STR_OPT( mergesort, 0)\r
-STR_OPT( tmpdir, ".")\r
-STR_OPT( staralign, 0)\r
-STR_OPT( sortuc, 0)\r
-STR_OPT( blastout, 0)\r
-STR_OPT( blast6out, 0)\r
-STR_OPT( fastapairs, 0)\r
-STR_OPT( idchar, "|")\r
-STR_OPT( diffchar, " ")\r
-STR_OPT( uchime, 0)\r
-STR_OPT( gapopen, 0)\r
-STR_OPT( gapext, 0)\r
-STR_OPT( uhire, 0)\r
-STR_OPT( ids, "99,98,95,90,85,80,70,50,35")\r
-STR_OPT( seeds, 0)\r
-STR_OPT( clump, 0)\r
-STR_OPT( clumpout, 0)\r
-STR_OPT( clump2fasta, 0)\r
-STR_OPT( clumpfasta, 0)\r
-STR_OPT( hireout, 0)\r
-STR_OPT( mergeclumps, 0)\r
-STR_OPT( alpha, 0)\r
-STR_OPT( hspalpha, 0)\r
-STR_OPT( probmx, 0)\r
-STR_OPT( matrix, 0)\r
-STR_OPT( tracestate, 0)\r
-STR_OPT( chainout, 0)\r
-STR_OPT( cluster, 0)\r
-STR_OPT( computekl, 0)\r
-STR_OPT( userout, 0)\r
-STR_OPT( userfields, 0)\r
-STR_OPT( seedsout, 0)\r
-STR_OPT( chainhits, 0)\r
-STR_OPT( findorfs, 0)\r
-STR_OPT( strand, 0)\r
-STR_OPT( getseqs, 0)\r
-STR_OPT( labels, 0)\r
-STR_OPT( doug, 0)\r
-STR_OPT( makeindex, 0)\r
-STR_OPT( indexstats, 0)\r
-STR_OPT( uchimeout, 0)\r
-STR_OPT( uchimealns, 0)\r
-STR_OPT( xframe, 0)\r
-STR_OPT( mkctest, 0)\r
-STR_OPT( allpairs, 0)\r
-STR_OPT( fastq2fasta, 0)\r
-STR_OPT( otusort, 0)\r
-STR_OPT( sparsedist, 0)\r
-STR_OPT( sparsedistparams, 0)\r
-STR_OPT( mcc, 0)\r
-STR_OPT( utax, 0)\r
-STR_OPT( simcl, 0)\r
-STR_OPT( absort, 0)\r
-STR_OPT( cc, 0)\r
-STR_OPT( uslink, 0)\r
-\r
-UNS_OPT( band, 16, 0, UINT_MAX)\r
-UNS_OPT( minlen, 10, 1, UINT_MAX)\r
-UNS_OPT( maxlen, 10000, 1, UINT_MAX)\r
-UNS_OPT( w, 0, 1, UINT_MAX)\r
-UNS_OPT( k, 0, 1, UINT_MAX)\r
-UNS_OPT( stepwords, 8, 0, UINT_MAX)\r
-UNS_OPT( maxaccepts, 1, 0, UINT_MAX)\r
-UNS_OPT( maxrejects, 8, 0, UINT_MAX)\r
-UNS_OPT( maxtargets, 0, 0, UINT_MAX)\r
-UNS_OPT( minhsp, 32, 1, UINT_MAX)\r
-UNS_OPT( bump, 50, 0, 100)\r
-UNS_OPT( rowlen, 64, 8, UINT_MAX)\r
-UNS_OPT( idprefix, 0, 0, UINT_MAX)\r
-UNS_OPT( idsuffix, 0, 0, UINT_MAX)\r
-UNS_OPT( chunks, 4, 2, UINT_MAX)\r
-UNS_OPT( minchunk, 64, 2, UINT_MAX)\r
-UNS_OPT( maxclump, 1000, 1, UINT_MAX)\r
-UNS_OPT( iddef, 0, 0, UINT_MAX)\r
-UNS_OPT( mincodons, 20, 1, UINT_MAX)\r
-UNS_OPT( maxovd, 8, 0, UINT_MAX)\r
-UNS_OPT( max2, 40, 0, UINT_MAX)\r
-UNS_OPT( querylen, 500, 0, UINT_MAX)\r
-UNS_OPT( targetlen, 500, 0, UINT_MAX)\r
-UNS_OPT( orfstyle, (1+2+4), 0, UINT_MAX)\r
-UNS_OPT( dbstep, 1, 1, UINT_MAX)\r
-UNS_OPT( randseed, 1, 0, UINT_MAX)\r
-UNS_OPT( maxp, 2, 2, UINT_MAX)\r
-UNS_OPT( idsmoothwindow, 32, 1, UINT_MAX)\r
-UNS_OPT( mindiffs, 3, 1, UINT_MAX)\r
-UNS_OPT( maxspan1, 24, 1, UINT_MAX)\r
-UNS_OPT( maxspan2, 24, 1, UINT_MAX)\r
-UNS_OPT( minorfcov, 16, 1, UINT_MAX)\r
-UNS_OPT( hashsize, 4195879, 1, UINT_MAX)\r
-UNS_OPT( maxpoly, 0, 0, UINT_MAX)\r
-UNS_OPT( droppct, 50, 0, 100)\r
-UNS_OPT( secs, 10, 0, UINT_MAX)\r
-UNS_OPT( maxqgap, 0, 0, UINT_MAX)\r
-UNS_OPT( maxtgap, 0, 0, UINT_MAX)\r
-\r
-INT_OPT( frame, 0, -3, +3)\r
-\r
-TOG_OPT( trace, false)\r
-TOG_OPT( logmemgrows, false)\r
-TOG_OPT( trunclabels, false)\r
-TOG_OPT( verbose, false)\r
-TOG_OPT( wordcountreject, true)\r
-TOG_OPT( rev, false)\r
-TOG_OPT( output_rejects, false)\r
-TOG_OPT( blast_termgaps, false)\r
-TOG_OPT( fastalign, true)\r
-TOG_OPT( flushuc, false)\r
-TOG_OPT( stable_sort, false)\r
-TOG_OPT( minus_frames, true)\r
-TOG_OPT( usort, true)\r
-TOG_OPT( nb, false)\r
-TOG_OPT( twohit, true)\r
-TOG_OPT( ssort, false)\r
-TOG_OPT( log_query, false)\r
-TOG_OPT( log_hothits, false)\r
-TOG_OPT( logwordstats, false)\r
-TOG_OPT( ucl, false)\r
-TOG_OPT( skipgaps2, true)\r
-TOG_OPT( skipgaps, true)\r
-TOG_OPT( denovo, false)\r
-TOG_OPT( cartoon_orfs, false)\r
-TOG_OPT( label_ab, false)\r
-TOG_OPT( wordweight, false)\r
-TOG_OPT( isort, false)\r
-TOG_OPT( selfid, false)\r
-TOG_OPT( leftjust, false)\r
-TOG_OPT( rightjust, false)\r
-\r
-FLT_OPT( id, 0.0, 0.0, 1.0)\r
-FLT_OPT( weak_id, 0.0, 0.0, 1.0)\r
-FLT_OPT( match, 1.0, 0.0, FLT_MAX)\r
-FLT_OPT( mismatch, -2.0, 0.0, FLT_MAX)\r
-FLT_OPT( split, 1000.0, 1.0, FLT_MAX)\r
-FLT_OPT( evalue, 10.0, 0.0, FLT_MAX)\r
-FLT_OPT( weak_evalue, 10.0, 0.0, FLT_MAX)\r
-FLT_OPT( evalue_g, 10.0, 0.0, FLT_MAX)\r
-FLT_OPT( chain_evalue, 10.0, 0.0, FLT_MAX)\r
-FLT_OPT( xdrop_u, 16.0, 0.0, FLT_MAX)\r
-FLT_OPT( xdrop_g, 32.0, 0.0, FLT_MAX)\r
-FLT_OPT( xdrop_ug, 16.0, 0.0, FLT_MAX)\r
-FLT_OPT( xdrop_nw, 16.0, 0.0, FLT_MAX)\r
-FLT_OPT( ka_gapped_lambda, 0.0, 0.0, FLT_MAX)\r
-FLT_OPT( ka_ungapped_lambda, 0.0, 0.0, FLT_MAX)\r
-FLT_OPT( ka_gapped_k, 0.0, 0.0, FLT_MAX)\r
-FLT_OPT( ka_ungapped_k, 0.0, 0.0, FLT_MAX)\r
-FLT_OPT( ka_dbsize, 0.0, 0.0, FLT_MAX)\r
-FLT_OPT( chain_targetfract, 0.0, 0.0, 1.0)\r
-FLT_OPT( targetfract, 0.0, 0.0, 1.0)\r
-FLT_OPT( queryfract, 0.0, 0.0, 1.0)\r
-FLT_OPT( fspenalty, 16.0, 0.0, FLT_MAX)\r
-FLT_OPT( sspenalty, 20.0, 0.0, FLT_MAX)\r
-FLT_OPT( seedt1, 13.0, 0.0, FLT_MAX)\r
-FLT_OPT( seedt2, 11.0, 0.0, FLT_MAX)\r
-FLT_OPT( lopen, 11.0, 0.0, FLT_MAX)\r
-FLT_OPT( lext, 1.0, 0.0, FLT_MAX)\r
-FLT_OPT( minh, 0.3, 0.0, FLT_MAX)\r
-FLT_OPT( xn, 8.0, 0.0, FLT_MAX)\r
-FLT_OPT( dn, 1.4, 0.0, FLT_MAX)\r
-FLT_OPT( xa, 1.0, 0.0, FLT_MAX)\r
-FLT_OPT( mindiv, 0.5, 0.0, 100.0)\r
-FLT_OPT( abskew, 2, 0.0, 100.0)\r
-FLT_OPT( abx, 8.0, 0.0, 100.0)\r
-FLT_OPT( minspanratio1, 0.7, 0.0, 1.0)\r
-FLT_OPT( minspanratio2, 0.7, 0.0, 1.0)\r
-\r
-FLAG_OPT( usersort)\r
-FLAG_OPT( exact)\r
-FLAG_OPT( optimal)\r
-FLAG_OPT( self)\r
-FLAG_OPT( ungapped)\r
-FLAG_OPT( global)\r
-FLAG_OPT( local)\r
-FLAG_OPT( xlat)\r
-FLAG_OPT( realign)\r
-FLAG_OPT( hash)\r
-FLAG_OPT( derep)\r
+++ /dev/null
-#include <time.h>\r
-#include <stdarg.h>\r
-#include <sys/stat.h>\r
-#include <errno.h>\r
-#include <string.h>\r
-#include <ctype.h>\r
-#include <string>\r
-#include <vector>\r
-#include <set>\r
-#include <map>\r
-#include <signal.h>\r
-#include <float.h>\r
-\r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
-#include <sys/time.h>\r
-#include <sys/resource.h>\r
-#include <unistd.h>\r
-#include <errno.h>\r
-#include <fcntl.h>\r
-#include <stdlib.h>\r
-#else\r
-//#include <crtdbg.h>\r
-#include <process.h>\r
-#include <windows.h>\r
-#include <psapi.h>\r
-#include <io.h>\r
-#endif\r
-\r
-#include "myutils.h"\r
-\r
-const char *SVN_VERSION =\r
-#include "svnversion.h"\r
-;\r
-\r
-#define TEST_UTILS 0\r
-\r
-using namespace std;\r
-\r
-const unsigned MY_IO_BUFSIZ = 32000;\r
-const unsigned MAX_FORMATTED_STRING_LENGTH = 64000;\r
-\r
-static char *g_IOBuffers[256];\r
-static time_t g_StartTime = time(0);\r
-static vector<string> g_Argv;\r
-static double g_PeakMemUseBytes;\r
-\r
-#if TEST_UTILS\r
-void TestUtils()\r
-{\r
- const int C = 100000000;\r
- for (int i = 0; i < C; ++i)\r
- ProgressStep(i, C, "something or other");\r
- \r
- Progress("\n");\r
- Progress("Longer message\r");\r
- Sleep(1000);\r
- Progress("Short\r");\r
- Sleep(1000);\r
- Progress("And longer again\r");\r
- Sleep(1000);\r
- Progress("Shrt\n");\r
- Sleep(1000);\r
- const unsigned N = 10;\r
- unsigned M = 10;\r
- for (unsigned i = 0; i < N; ++i)\r
- {\r
- ProgressStep(i, N, "Allocating 1MB blocks");\r
- for (unsigned j = 0; j < M; ++j)\r
- {\r
- ProgressStep(j, M, "Inner loop"); \r
- malloc(100000);\r
- Sleep(500);\r
- }\r
- }\r
-}\r
-#endif // TEST_UTILS\r
-\r
-static void AllocBuffer(FILE *f)\r
-{\r
- int fd = fileno(f);\r
- if (fd < 0 || fd >= 256)\r
- return;\r
- if (g_IOBuffers[fd] == 0)\r
- g_IOBuffers[fd] = myalloc(char, MY_IO_BUFSIZ);\r
- setvbuf(f, g_IOBuffers[fd], _IOFBF, MY_IO_BUFSIZ);\r
-}\r
-\r
-static void FreeBuffer(FILE *f)\r
-{\r
- int fd = fileno(f);\r
- if (fd < 0 || fd >= 256)\r
- return;\r
- if (g_IOBuffers[fd] == 0)\r
- return;\r
- myfree(g_IOBuffers[fd]);\r
- g_IOBuffers[fd] = 0;\r
-}\r
-\r
-unsigned GetElapsedSecs()\r
-{\r
- return (unsigned) (time(0) - g_StartTime);\r
-}\r
-\r
-static unsigned g_NewCalls;\r
-static unsigned g_FreeCalls;\r
-static double g_InitialMemUseBytes;\r
-static double g_TotalAllocBytes;\r
-static double g_TotalFreeBytes;\r
-static double g_NetBytes;\r
-static double g_MaxNetBytes;\r
-\r
-void LogAllocStats()\r
-{\r
- Log("\n");\r
- Log(" Allocs %u\n", g_NewCalls);\r
- Log(" Frees %u\n", g_FreeCalls);\r
- Log("Initial alloc %s\n", MemBytesToStr(g_InitialMemUseBytes));\r
- Log(" Total alloc %s\n", MemBytesToStr(g_TotalAllocBytes));\r
- Log(" Total free %s\n", MemBytesToStr(g_TotalFreeBytes));\r
- Log(" Net bytes %s\n", MemBytesToStr(g_NetBytes));\r
- Log("Max net bytes %s\n", MemBytesToStr(g_MaxNetBytes));\r
- Log(" Peak total %s\n", MemBytesToStr(g_MaxNetBytes + g_InitialMemUseBytes));\r
-}\r
-\r
-bool StdioFileExists(const string &FileName)\r
-{\r
- struct stat SD;\r
- int i = stat(FileName.c_str(), &SD);\r
- return i == 0;\r
-}\r
-\r
-void myassertfail(const char *Exp, const char *File, unsigned Line)\r
-{\r
- Die("%s(%u) assert failed: %s", File, Line, Exp);\r
-}\r
-\r
-bool myisatty(int fd)\r
-{\r
- return isatty(fd) != 0;\r
-}\r
-\r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
-#else\r
-//#ifdef BIT_VERSION\r
-//#include <io.h>\r
-//int fseeko(FILE *stream, off_t offset, int whence)\r
-//// {\r
-// off_t FilePos = _fseeki64(stream, offset, whence);\r
-// return (FilePos == -1L) ? -1 : 0;\r
-// }\r
-//#define ftello(fm) (off_t) _ftelli64(fm)\r
-//#else \r
-int fseeko(FILE *stream, off_t offset, int whence)\r
-{\r
- off_t FilePos = fseek(stream, offset, whence);\r
- return (FilePos == -1L) ? -1 : 0;\r
-}\r
-#define ftello(fm) (off_t) ftell(fm)\r
-//#endif\r
-#endif\r
-\r
-void LogStdioFileState(FILE *f)\r
-{\r
- unsigned long tellpos = (unsigned long) ftello(f);\r
- long fseek_pos = fseek(f, 0, SEEK_CUR);\r
- int fd = fileno(f);\r
- Log("FILE * %p\n", f);\r
- Log("fileno %d\n", fd);\r
- Log("feof %d\n", feof(f));\r
- Log("ferror %d\n", ferror(f));\r
- Log("ftell %ld\n", tellpos);\r
- Log("fseek %ld\n", fseek_pos);\r
-#if !defined(_GNU_SOURCE) && !defined(__APPLE_CC__)\r
- fpos_t fpos;\r
- int fgetpos_retval = fgetpos(f, &fpos);\r
- Log("fpos %ld (retval %d)\n", (long) fpos, fgetpos_retval);\r
- // Log("eof %d\n", _eof(fd));\r
-#endif\r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
-#else\r
-#ifdef BIT_VERSION\r
- __int64 pos64 = _ftelli64(f);\r
- Log("_ftelli64 %lld\n", pos64);\r
-#else\r
- __int32 pos32 = ftell(f);\r
- Log("ftell %lld\n", pos32);\r
- \r
-#endif\r
-#endif\r
-}\r
-\r
-FILE *OpenStdioFile(const string &FileName)\r
-{\r
- const char *Mode = "rb";\r
- FILE *f = fopen(FileName.c_str(), Mode);\r
- if (f == 0)\r
- {\r
- if (errno == EFBIG)\r
- {\r
- if (sizeof(off_t) == 4)\r
- Die("File too big, off_t is 32 bits, recompile needed");\r
- else\r
- Die("Cannot open '%s', file too big (off_t=%u bits)",\r
- FileName.c_str(), sizeof(off_t)*8);\r
- }\r
- Die("Cannot open %s, errno=%d %s",\r
- FileName.c_str(), errno, strerror(errno));\r
- }\r
- AllocBuffer(f);\r
- return f;\r
-}\r
-\r
-FILE *CreateStdioFile(const string &FileName)\r
-{\r
- FILE *f = fopen(FileName.c_str(), "wb+");\r
- if (0 == f)\r
- Die("Cannot create %s, errno=%d %s",\r
- FileName.c_str(), errno, strerror(errno));\r
- AllocBuffer(f);\r
- return f;\r
-}\r
-\r
-void SetStdioFilePos(FILE *f, off_t Pos)\r
-{\r
- if (0 == f)\r
- Die("SetStdioFilePos failed, f=NULL");\r
- int Ok = fseeko(f, Pos, SEEK_SET);\r
- off_t NewPos = ftello(f);\r
- if (Ok != 0 || Pos != NewPos)\r
- {\r
- LogStdioFileState(f);\r
- Die("SetStdioFilePos(%d) failed, Ok=%d NewPos=%d",\r
- (int) Pos, Ok, (int) NewPos);\r
- }\r
-}\r
-\r
-void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes)\r
-{\r
- if (0 == f)\r
- Die("ReadStdioFile failed, f=NULL");\r
- SetStdioFilePos(f, Pos);\r
- unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
- if (BytesRead != Bytes)\r
- {\r
- LogStdioFileState(f);\r
- Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
- (int) Bytes, (int) BytesRead, errno);\r
- }\r
-}\r
-\r
-void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes)\r
-{\r
- if (0 == f)\r
- Die("ReadStdioFile failed, f=NULL");\r
- unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
- if (BytesRead != Bytes)\r
- {\r
- LogStdioFileState(f);\r
- Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
- (int) Bytes, (int) BytesRead, errno);\r
- }\r
-}\r
-\r
-// Return values from functions like lseek, ftell, fgetpos are\r
-// "undefined" for files that cannot seek. Attempt to detect\r
-// whether a file can seek by checking for error returns.\r
-bool CanSetStdioFilePos(FILE *f)\r
-{\r
- // Common special cases\r
- if (f == stdin || f == stdout || f == stderr)\r
- return false;\r
- \r
- fpos_t CurrPos;\r
- int ok1 = fgetpos(f, &CurrPos);\r
- if (ok1 < 0)\r
- return false;\r
- int ok2 = fseek(f, 0, SEEK_END);\r
- if (ok2 < 0)\r
- return false;\r
- fpos_t EndPos;\r
- int ok3 = fgetpos(f, &EndPos);\r
- int ok4 = fsetpos(f, &CurrPos);\r
- if (!ok3 || !ok4)\r
- return false;\r
- return true;\r
-}\r
-\r
-byte *ReadAllStdioFile(FILE *f, unsigned &FileSize)\r
-{\r
- const unsigned BUFF_SIZE = 1024*1024;\r
- \r
- if (CanSetStdioFilePos(f))\r
- {\r
- off_t Pos = GetStdioFilePos(f);\r
- off_t FileSize = GetStdioFileSize(f);\r
- if (FileSize > UINT_MAX)\r
- Die("ReadAllStdioFile: file size > UINT_MAX");\r
- SetStdioFilePos(f, 0);\r
- byte *Buffer = myalloc(byte, unsigned(FileSize));\r
- ReadStdioFile(f, Buffer, unsigned(FileSize));\r
- SetStdioFilePos(f, Pos);\r
- FileSize = unsigned(FileSize);\r
- return Buffer;\r
- }\r
- \r
- // Can't seek, read one buffer at a time.\r
- FileSize = 0;\r
- \r
- // Just to initialize so that first call to realloc works.\r
- byte *Buffer = (byte *) malloc(4);\r
- if (Buffer == 0)\r
- Die("ReadAllStdioFile, out of memory");\r
- for (;;)\r
- {\r
- Buffer = (byte *) realloc(Buffer, FileSize + BUFF_SIZE);\r
- unsigned BytesRead = fread(Buffer + FileSize, 1, BUFF_SIZE, f);\r
- FileSize += BytesRead;\r
- if (BytesRead < BUFF_SIZE)\r
- {\r
- Buffer = (byte *) realloc(Buffer, FileSize);\r
- return Buffer;\r
- }\r
- }\r
-}\r
-\r
-byte *ReadAllStdioFile(const std::string &FileName, off_t &FileSize)\r
-{\r
-#if WIN32\r
- FILE *f = OpenStdioFile(FileName);\r
- FileSize = GetStdioFileSize(f);\r
- CloseStdioFile(f);\r
- \r
- HANDLE h = CreateFile(FileName.c_str(), GENERIC_READ, FILE_SHARE_READ,\r
- NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);\r
- if (h == INVALID_HANDLE_VALUE)\r
- Die("ReadAllStdioFile:Open(%s) failed", FileName.c_str());\r
- \r
- unsigned uFileSize = (unsigned) FileSize;\r
- if ((off_t) uFileSize != FileSize)\r
- Die("File too big (%.1f Gb): %s", double(FileSize)/1e9, FileName.c_str());\r
- \r
- byte *Buffer = myalloc(byte, uFileSize);\r
- DWORD BytesRead;\r
- ReadFile(h, Buffer, uFileSize, &BytesRead, NULL);\r
- if (FileSize != BytesRead)\r
- Die("ReadAllStdioFile:Error reading %s, attempted %u got %u",\r
- FileName.c_str(), FileSize, (unsigned) BytesRead);\r
- \r
- CloseHandle(h);\r
- return Buffer;\r
-#else\r
- int h = open(FileName.c_str(), O_RDONLY);\r
- if (h < 0)\r
- Die("ReadAllStdioFile:Cannot open %s", FileName.c_str());\r
- FileSize = lseek(h, 0, SEEK_END);\r
- if (FileSize == (off_t) (-1))\r
- Die("ReadAllStdioFile:Error seeking %s", FileName.c_str());\r
- // byte *Buffer = myalloc<byte>(FileSize);\r
- size_t stBytes = (size_t) FileSize;\r
- if ((off_t) stBytes != FileSize)\r
- Die("ReadAllStdioFile: off_t overflow");\r
- byte *Buffer = (byte *) malloc(stBytes);\r
- if (Buffer == 0)\r
- Die("ReadAllStdioFile: failed to allocate %s", MemBytesToStr(stBytes));\r
- lseek(h, 0, SEEK_SET);\r
- size_t n = read(h, Buffer, stBytes);\r
- if (n != FileSize)\r
- Die("ReadAllStdioFile, Error reading %s, attempted %g got %g",\r
- FileName.c_str(), (double) FileSize, (double) n);\r
- close(h);\r
- return Buffer;\r
-#endif\r
-}\r
-\r
-void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes)\r
-{\r
- if (0 == f)\r
- Die("WriteStdioFile failed, f=NULL");\r
- SetStdioFilePos(f, Pos);\r
- unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
- if (BytesWritten != Bytes)\r
- {\r
- LogStdioFileState(f);\r
- Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
- (int) Bytes, (int) BytesWritten, errno);\r
- }\r
-}\r
-\r
-void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes)\r
-{\r
- if (0 == f)\r
- Die("WriteStdioFile failed, f=NULL");\r
- unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
- if (BytesWritten != Bytes)\r
- {\r
- LogStdioFileState(f);\r
- Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
- (int) Bytes, (int) BytesWritten, errno);\r
- }\r
-}\r
-\r
-// Return false on EOF, true if line successfully read.\r
-bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes)\r
-{\r
- if (feof(f))\r
- return false;\r
- if ((int) Bytes < 0)\r
- Die("ReadLineStdioFile: Bytes < 0");\r
- char *RetVal = fgets(Line, (int) Bytes, f);\r
- if (NULL == RetVal)\r
- {\r
- if (feof(f))\r
- return false;\r
- if (ferror(f))\r
- Die("ReadLineStdioFile: errno=%d", errno);\r
- Die("ReadLineStdioFile: fgets=0, feof=0, ferror=0");\r
- }\r
- \r
- if (RetVal != Line)\r
- Die("ReadLineStdioFile: fgets != Buffer");\r
- unsigned n = strlen(Line);\r
- if (n < 1 || Line[n-1] != '\n')\r
- Die("ReadLineStdioFile: line too long or missing end-of-line");\r
- if (n > 0 && (Line[n-1] == '\r' || Line[n-1] == '\n'))\r
- Line[n-1] = 0;\r
- if (n > 1 && (Line[n-2] == '\r' || Line[n-2] == '\n'))\r
- Line[n-2] = 0;\r
- return true;\r
-}\r
-\r
-// Return false on EOF, true if line successfully read.\r
-bool ReadLineStdioFile(FILE *f, string &Line)\r
-{\r
- Line.clear();\r
- for (;;)\r
- {\r
- int c = fgetc(f);\r
- if (c == -1)\r
- {\r
- if (feof(f))\r
- {\r
- if (!Line.empty())\r
- return true;\r
- return false;\r
- }\r
- Die("ReadLineStdioFile, errno=%d", errno);\r
- }\r
- if (c == '\r')\r
- continue;\r
- if (c == '\n')\r
- return true;\r
- Line.push_back((char) c);\r
- }\r
-}\r
-\r
-// Copies all of fFrom regardless of current\r
-// file position, appends to fTo.\r
-void AppendStdioFileToFile(FILE *fFrom, FILE *fTo)\r
-{\r
- off_t SavedFromPos = GetStdioFilePos(fFrom);\r
- off_t FileSize = GetStdioFileSize(fFrom);\r
- const off_t BUFF_SIZE = 1024*1024;\r
- char *Buffer = myalloc(char, BUFF_SIZE);\r
- SetStdioFilePos(fFrom, 0);\r
- off_t BytesRemaining = FileSize;\r
- while (BytesRemaining > 0)\r
- {\r
- off_t BytesToRead = BytesRemaining;\r
- if (BytesToRead > BUFF_SIZE)\r
- BytesToRead = BUFF_SIZE;\r
- ReadStdioFile(fFrom, Buffer, (unsigned) BytesToRead);\r
- WriteStdioFile(fTo, Buffer, (unsigned) BytesToRead);\r
- BytesRemaining -= BytesToRead;\r
- }\r
- SetStdioFilePos(fFrom, SavedFromPos);\r
-}\r
-\r
-void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo)\r
-{\r
- int Ok = rename(FileNameFrom.c_str(), FileNameTo.c_str());\r
- if (Ok != 0)\r
- Die("RenameStdioFile(%s,%s) failed, errno=%d %s",\r
- FileNameFrom.c_str(), FileNameTo.c_str(), errno, strerror(errno));\r
-}\r
-\r
-void FlushStdioFile(FILE *f)\r
-{\r
- int Ok = fflush(f);\r
- if (Ok != 0)\r
- Die("fflush(%p)=%d,", f, Ok);\r
-}\r
-\r
-void CloseStdioFile(FILE *f)\r
-{\r
- if (f == 0)\r
- return;\r
- int Ok = fclose(f);\r
- if (Ok != 0)\r
- Die("fclose(%p)=%d", f, Ok);\r
- FreeBuffer(f);\r
-}\r
-\r
-off_t GetStdioFilePos(FILE *f)\r
-{\r
- off_t FilePos = ftello(f);\r
- if (FilePos < 0)\r
- Die("ftello=%d", (int) FilePos);\r
- return FilePos;\r
-}\r
-\r
-off_t GetStdioFileSize(FILE *f)\r
-{\r
- off_t CurrentPos = GetStdioFilePos(f);\r
- off_t zeroPos = 0;\r
- int Ok = fseeko(f, zeroPos, SEEK_END);\r
- if (Ok < 0)\r
- Die("fseek in GetFileSize");\r
- \r
- off_t Length = ftello(f);\r
- if (Length < 0)\r
- Die("ftello in GetFileSize");\r
- SetStdioFilePos(f, CurrentPos);\r
- return Length;\r
-}\r
-\r
-void DeleteStdioFile(const string &FileName)\r
-{\r
- int Ok = remove(FileName.c_str());\r
- if (Ok != 0)\r
- Die("remove(%s) failed, errno=%d %s", FileName.c_str(), errno, strerror(errno));\r
-}\r
-\r
-void myvstrprintf(string &Str, const char *Format, va_list ArgList)\r
-{\r
- static char szStr[MAX_FORMATTED_STRING_LENGTH];\r
- vsnprintf(szStr, MAX_FORMATTED_STRING_LENGTH-1, Format, ArgList);\r
- szStr[MAX_FORMATTED_STRING_LENGTH - 1] = '\0';\r
- Str.assign(szStr);\r
-}\r
-\r
-void myvstrprintf(string &Str, const char *Format, ...)\r
-{\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Str, Format, ArgList);\r
- va_end(ArgList);\r
-}\r
-\r
-FILE *g_fLog = 0;\r
-\r
-void SetLogFileName(const string &FileName)\r
-{\r
- if (g_fLog != 0)\r
- CloseStdioFile(g_fLog);\r
- g_fLog = 0;\r
- if (FileName.empty())\r
- return;\r
- g_fLog = CreateStdioFile(FileName);\r
-}\r
-\r
-void Log(const char *Format, ...)\r
-{\r
- if (g_fLog == 0)\r
- return;\r
- \r
- static bool InLog = false;\r
- if (InLog)\r
- return;\r
- \r
- InLog = true;\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- vfprintf(g_fLog, Format, ArgList);\r
- va_end(ArgList);\r
- fflush(g_fLog);\r
- InLog = false;\r
-}\r
-\r
-void Die(const char *Format, ...)\r
-{\r
- static bool InDie = false;\r
- if (InDie)\r
- exit(1);\r
- InDie = true;\r
- string Msg;\r
- \r
- if (g_fLog != 0)\r
- setbuf(g_fLog, 0);\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Msg, Format, ArgList);\r
- va_end(ArgList);\r
- \r
- fprintf(stderr, "\n\n");\r
- Log("\n");\r
- time_t t = time(0);\r
- Log("%s", asctime(localtime(&t)));\r
- for (unsigned i = 0; i < g_Argv.size(); i++)\r
- {\r
- fprintf(stderr, (i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
- Log((i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
- }\r
- fprintf(stderr, "\n");\r
- Log("\n");\r
- \r
- time_t CurrentTime = time(0);\r
- unsigned ElapsedSeconds = unsigned(CurrentTime - g_StartTime);\r
- const char *sstr = SecsToStr(ElapsedSeconds);\r
- Log("Elapsed time: %s\n", sstr);\r
- \r
- const char *szStr = Msg.c_str();\r
- fprintf(stderr, "\n---Fatal error---\n%s\n", szStr);\r
- Log("\n---Fatal error---\n%s\n", szStr);\r
- \r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
-#else\r
- //if (IsDebuggerPresent())\r
- // __debugbreak();\r
- //_CrtSetDbgFlag(0);\r
-#endif\r
- \r
- exit(1);\r
-}\r
-\r
-void Warning(const char *Format, ...)\r
-{\r
- string Msg;\r
- \r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Msg, Format, ArgList);\r
- va_end(ArgList);\r
- \r
- const char *szStr = Msg.c_str();\r
- \r
- fprintf(stderr, "\nWARNING: %s\n", szStr);\r
- if (g_fLog != stdout)\r
- {\r
- Log("\nWARNING: %s\n", szStr);\r
- fflush(g_fLog);\r
- }\r
-}\r
-\r
-#if defined linux || __linux__\r
-double GetMemUseBytes()\r
-{\r
- static char statm[64];\r
- static int PageSize = 1;\r
- if (0 == statm[0])\r
- {\r
- PageSize = sysconf(_SC_PAGESIZE);\r
- pid_t pid = getpid();\r
- sprintf(statm, "/proc/%d/statm", (int) pid);\r
- }\r
- \r
- int fd = open(statm, O_RDONLY);\r
- if (-1 == fd)\r
- return 1000000;\r
- char Buffer[64];\r
- int n = read(fd, Buffer, sizeof(Buffer) - 1);\r
- close(fd);\r
- fd = -1;\r
- \r
- if (n <= 0)\r
- return 1000000;\r
- \r
- Buffer[n] = 0;\r
- double Pages = atof(Buffer);\r
- \r
- double Bytes = Pages*PageSize;\r
- if (Bytes > g_PeakMemUseBytes)\r
- g_PeakMemUseBytes = Bytes;\r
- return Bytes;\r
-}\r
-#elif defined(__APPLE__) || (__MACH__)\r
-#include <memory.h>\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-#include <unistd.h>\r
-#include <sys/types.h>\r
-#include <sys/sysctl.h>\r
-#include <sys/socket.h>\r
-#include <sys/gmon.h>\r
-#include <mach/vm_param.h>\r
-#include <netinet/in.h>\r
-#include <netinet/icmp6.h>\r
-#include <sys/vmmeter.h>\r
-#include <sys/proc.h>\r
-#include <mach/task_info.h>\r
-#include <mach/task.h>\r
-#include <mach/mach_init.h>\r
-#include <mach/vm_statistics.h>\r
-\r
-#define DEFAULT_MEM_USE 100000000.0\r
-\r
-double GetMemUseBytes()\r
-{\r
- task_t mytask = mach_task_self();\r
- struct task_basic_info ti;\r
- memset((void *) &ti, 0, sizeof(ti));\r
- mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;\r
- kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);\r
- if (ok == KERN_INVALID_ARGUMENT)\r
- return DEFAULT_MEM_USE;\r
- \r
- if (ok != KERN_SUCCESS)\r
- return DEFAULT_MEM_USE;\r
- \r
- double Bytes = (double ) ti.resident_size;\r
- if (Bytes > g_PeakMemUseBytes)\r
- g_PeakMemUseBytes = Bytes;\r
- return Bytes;\r
-}\r
-#else\r
-double GetMemUseBytes()\r
-{\r
- return 0;\r
-}\r
-#endif\r
-\r
-double GetPeakMemUseBytes()\r
-{\r
- return g_PeakMemUseBytes;\r
-}\r
-\r
-const char *SecsToHHMMSS(int Secs)\r
-{\r
- int HH = Secs/3600;\r
- int MM = (Secs - HH*3600)/60;\r
- int SS = Secs%60;\r
- static char Str[16];\r
- if (HH == 0)\r
- sprintf(Str, "%02d:%02d", MM, SS);\r
- else\r
- sprintf(Str, "%02d:%02d:%02d", HH, MM, SS);\r
- return Str;\r
-}\r
-\r
-const char *SecsToStr(double Secs)\r
-{\r
- if (Secs >= 10.0)\r
- return SecsToHHMMSS((int) Secs);\r
- \r
- static char Str[16];\r
- if (Secs < 1e-6)\r
- sprintf(Str, "%.2gs", Secs);\r
- else if (Secs < 1e-3)\r
- sprintf(Str, "%.2fms", Secs*1e3);\r
- else\r
- sprintf(Str, "%.3fs", Secs);\r
- return Str;\r
-}\r
-\r
-const char *MemBytesToStr(double Bytes)\r
-{\r
- static char Str[32];\r
- \r
- if (Bytes < 1e6)\r
- sprintf(Str, "%.1fkb", Bytes/1e3);\r
- else if (Bytes < 10e6)\r
- sprintf(Str, "%.1fMb", Bytes/1e6);\r
- else if (Bytes < 1e9)\r
- sprintf(Str, "%.0fMb", Bytes/1e6);\r
- else if (Bytes < 10e9)\r
- sprintf(Str, "%.1fGb", Bytes/1e9);\r
- else if (Bytes < 100e9)\r
- sprintf(Str, "%.0fGb", Bytes/1e9);\r
- else\r
- sprintf(Str, "%.3gb", Bytes);\r
- return Str;\r
-}\r
-\r
-const char *IntToStr(unsigned i)\r
-{\r
- static char Str[32];\r
- \r
- double d = (double) i;\r
- if (i < 10000)\r
- sprintf(Str, "%u", i);\r
- else if (i < 1e6)\r
- sprintf(Str, "%.1fk", d/1e3);\r
- else if (i < 10e6)\r
- sprintf(Str, "%.1fM", d/1e6);\r
- else if (i < 1e9)\r
- sprintf(Str, "%.0fM", d/1e6);\r
- else if (i < 10e9)\r
- sprintf(Str, "%.1fG", d/1e9);\r
- else if (i < 100e9)\r
- sprintf(Str, "%.0fG", d/1e9);\r
- else\r
- sprintf(Str, "%.3g", d);\r
- return Str;\r
-}\r
-\r
-const char *FloatToStr(double d)\r
-{\r
- static char Str[32];\r
- \r
- double a = fabs(d);\r
- if (a < 0.01)\r
- sprintf(Str, "%.3g", a);\r
- else if (a >= 0.01 && a < 1)\r
- sprintf(Str, "%.3f", a);\r
- else if (a <= 10 && a >= 1)\r
- {\r
- double intpart;\r
- if (modf(a, &intpart) < 0.05)\r
- sprintf(Str, "%.0f", d);\r
- else\r
- sprintf(Str, "%.1f", d);\r
- }\r
- else if (a > 10 && a < 10000)\r
- sprintf(Str, "%.0f", d);\r
- else if (a < 1e6)\r
- sprintf(Str, "%.1fk", d/1e3);\r
- else if (a < 10e6)\r
- sprintf(Str, "%.1fM", d/1e6);\r
- else if (a < 1e9)\r
- sprintf(Str, "%.0fM", d/1e6);\r
- else if (a < 10e9)\r
- sprintf(Str, "%.1fG", d/1e9);\r
- else if (a < 100e9)\r
- sprintf(Str, "%.0fG", d/1e9);\r
- else\r
- sprintf(Str, "%.3g", d);\r
- return Str;\r
-}\r
-\r
-bool opt_quiet = false;\r
-bool opt_version = false;\r
-bool opt_logopts = false;\r
-bool opt_compilerinfo = false;\r
-bool opt_help = false;\r
-string opt_log = "";\r
-\r
-bool optset_quiet = false;\r
-bool optset_version = false;\r
-bool optset_logopts = false;\r
-bool optset_compilerinfo = false;\r
-bool optset_help = false;\r
-bool optset_log = false;\r
-\r
-static string g_CurrentProgressLine;\r
-static string g_ProgressDesc;\r
-static unsigned g_ProgressIndex;\r
-static unsigned g_ProgressCount;\r
-\r
-static unsigned g_CurrProgressLineLength;\r
-static unsigned g_LastProgressLineLength;\r
-static unsigned g_CountsInterval;\r
-static unsigned g_StepCalls;\r
-static time_t g_TimeLastOutputStep;\r
-\r
-static string &GetProgressPrefixStr(string &s)\r
-{\r
- double Bytes = GetMemUseBytes();\r
- unsigned Secs = GetElapsedSecs();\r
- s = string(SecsToHHMMSS(Secs));\r
- if (Bytes > 0)\r
- {\r
- s.push_back(' ');\r
- char Str[32];\r
- sprintf(Str, "%5.5s", MemBytesToStr(Bytes));\r
- s += string(Str);\r
- }\r
- s.push_back(' ');\r
- return s;\r
-}\r
-\r
-void ProgressLog(const char *Format, ...)\r
-{\r
- string Str;\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Str, Format, ArgList);\r
- va_end(ArgList);\r
- \r
- Log("%s", Str.c_str());\r
- Progress("%s", Str.c_str());\r
-}\r
-\r
-void Progress(const char *Format, ...)\r
-{\r
- if (opt_quiet)\r
- return;\r
- \r
- string Str;\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Str, Format, ArgList);\r
- va_end(ArgList);\r
- \r
-#if 0\r
- Log("Progress(");\r
- for (unsigned i = 0; i < Str.size(); ++i)\r
- {\r
- char c = Str[i];\r
- if (c == '\r')\r
- Log("\\r");\r
- else if (c == '\n')\r
- Log("\\n");\r
- else\r
- Log("%c", c);\r
- }\r
- Log(")\n");\r
-#endif //0\r
- \r
- for (unsigned i = 0; i < Str.size(); ++i)\r
- {\r
- if (g_CurrProgressLineLength == 0)\r
- {\r
- string s;\r
- GetProgressPrefixStr(s);\r
- for (unsigned j = 0; j < s.size(); ++j)\r
- {\r
- fputc(s[j], stderr);\r
- ++g_CurrProgressLineLength;\r
- }\r
- }\r
- \r
- char c = Str[i];\r
- if (c == '\n' || c == '\r')\r
- {\r
- for (unsigned j = g_CurrProgressLineLength; j < g_LastProgressLineLength; ++j)\r
- fputc(' ', stderr);\r
- if (c == '\n')\r
- g_LastProgressLineLength = 0;\r
- else\r
- g_LastProgressLineLength = g_CurrProgressLineLength;\r
- g_CurrProgressLineLength = 0;\r
- fputc(c, stderr);\r
- }\r
- else\r
- {\r
- fputc(c, stderr);\r
- ++g_CurrProgressLineLength;\r
- }\r
- }\r
-}\r
-\r
-void ProgressExit()\r
-{\r
- time_t Now = time(0);\r
- struct tm *t = localtime(&Now);\r
- const char *s = asctime(t);\r
- unsigned Secs = GetElapsedSecs();\r
- \r
- Log("\n");\r
- Log("Finished %s", s); // there is a newline in s\r
- Log("Elapsed time %s\n", SecsToHHMMSS((int) Secs));\r
- Log("Max memory %s\n", MemBytesToStr(g_PeakMemUseBytes));\r
-#if WIN32 && DEBUG\r
- // Skip exit(), which can be very slow in DEBUG build\r
- // VERY DANGEROUS practice, because it skips global destructors.\r
- // But if you know the rules, you can break 'em, right?\r
- //ExitProcess(0);\r
-#endif\r
-}\r
-\r
-const char *PctStr(double x, double y)\r
-{\r
- if (y == 0)\r
- {\r
- if (x == 0)\r
- return "100%";\r
- else\r
- return "inf%";\r
- }\r
- static char Str[16];\r
- double p = x*100.0/y;\r
- sprintf(Str, "%5.1f%%", p);\r
- return Str;\r
-}\r
-\r
-string &GetProgressLevelStr(string &s)\r
-{\r
- unsigned Index = g_ProgressIndex;\r
- unsigned Count = g_ProgressCount;\r
- if (Count == UINT_MAX)\r
- {\r
- if (Index == UINT_MAX)\r
- s = "100%";\r
- else\r
- {\r
- char Tmp[16];\r
- sprintf(Tmp, "%u", Index); \r
- s = Tmp;\r
- }\r
- }\r
- else\r
- s = string(PctStr(Index+1, Count));\r
- s += string(" ") + g_ProgressDesc;\r
- return s;\r
-}\r
-\r
-void ProgressStep(unsigned i, unsigned N, const char *Format, ...)\r
-{\r
- if (opt_quiet)\r
- return;\r
- \r
- if (i == 0)\r
- {\r
- string Str;\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(Str, Format, ArgList);\r
- va_end(ArgList);\r
- g_ProgressDesc = Str;\r
- g_ProgressIndex = 0;\r
- g_ProgressCount = N;\r
- g_CountsInterval = 1;\r
- g_StepCalls = 0;\r
- g_TimeLastOutputStep = 0;\r
- if (g_CurrProgressLineLength > 0)\r
- Progress("\n");\r
- }\r
- \r
- if (i >= N && i != UINT_MAX)\r
- Die("ProgressStep(%u,%u)", i, N);\r
- bool IsLastStep = (i == UINT_MAX || i + 1 == N);\r
- if (!IsLastStep)\r
- {\r
- ++g_StepCalls;\r
- if (g_StepCalls%g_CountsInterval != 0)\r
- return;\r
- \r
- time_t Now = time(0);\r
- if (Now == g_TimeLastOutputStep)\r
- {\r
- if (g_CountsInterval < 128)\r
- g_CountsInterval = (g_CountsInterval*3)/2;\r
- else\r
- g_CountsInterval += 64;\r
- return;\r
- }\r
- else\r
- {\r
- time_t Secs = Now - g_TimeLastOutputStep;\r
- if (Secs > 1)\r
- g_CountsInterval = unsigned(g_CountsInterval/(Secs*8));\r
- }\r
- \r
- if (g_CountsInterval < 1)\r
- g_CountsInterval = 1;\r
- \r
- g_TimeLastOutputStep = Now;\r
- }\r
- \r
- g_ProgressIndex = i;\r
- \r
- if (i > 0)\r
- {\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- myvstrprintf(g_ProgressDesc, Format, ArgList);\r
- }\r
- \r
- string LevelStr;\r
- GetProgressLevelStr(LevelStr);\r
- Progress(" %s\r", LevelStr.c_str());\r
- \r
- if (IsLastStep)\r
- {\r
- g_CountsInterval = 1;\r
- fputc('\n', stderr);\r
- }\r
-}\r
-\r
-enum OptType\r
-{\r
- OT_Flag,\r
- OT_Tog,\r
- OT_Int,\r
- OT_Uns,\r
- OT_Str,\r
- OT_Float,\r
- OT_Enum\r
-};\r
-\r
-struct OptInfo\r
-{\r
- void *Value;\r
- bool *OptSet;\r
- string LongName;\r
- OptType Type;\r
- int iMin;\r
- int iMax;\r
- unsigned uMin;\r
- unsigned uMax;\r
- double dMin;\r
- double dMax;\r
- map<string, unsigned> EnumValues;\r
- \r
- bool bDefault;\r
- int iDefault;\r
- unsigned uDefault;\r
- double dDefault;\r
- string strDefault;\r
- \r
- string Help;\r
- \r
- bool operator<(const OptInfo &rhs) const\r
- {\r
- return LongName < rhs.LongName;\r
- }\r
-};\r
-\r
-static set<OptInfo> g_Opts;\r
-\r
-void Help()\r
-{\r
- printf("\n");\r
- \r
- void Usage();\r
- Usage();\r
- \r
- for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
- {\r
- const OptInfo &Opt = *p;\r
- \r
- printf("\n");\r
- string LongName = Opt.LongName.c_str();\r
- if (Opt.Type == OT_Tog)\r
- LongName = string("[no]") + LongName;\r
- printf(" --%s ", LongName.c_str());\r
- \r
- switch (Opt.Type)\r
- {\r
- case OT_Flag:\r
- break;\r
- case OT_Tog:\r
- break;\r
- case OT_Int:\r
- printf("<int>");\r
- break;\r
- case OT_Uns:\r
- printf("<uint>");\r
- break;\r
- case OT_Str:\r
- printf("<str>");\r
- break;\r
- case OT_Float:\r
- printf("<float>");\r
- break;\r
- case OT_Enum:\r
- printf("<enum>");\r
- break;\r
- default:\r
- printf("??type");\r
- break;\r
- }\r
- \r
- printf(" ");\r
- const string &s = Opt.Help;\r
- for (string::const_iterator q = s.begin(); q != s.end(); ++q)\r
- {\r
- char c = *q;\r
- if (c == '\n')\r
- printf("\n ");\r
- else\r
- printf("%c", c);\r
- }\r
- printf("\n");\r
- }\r
- printf("\n");\r
- exit(0);\r
-}\r
-\r
-void CmdLineErr(const char *Format, ...)\r
-{\r
- va_list ArgList;\r
- va_start(ArgList, Format);\r
- string Str;\r
- myvstrprintf(Str, Format, ArgList);\r
- va_end(ArgList);\r
- fprintf(stderr, "\n");\r
- fprintf(stderr, "Invalid command line\n");\r
- fprintf(stderr, "%s\n", Str.c_str());\r
- fprintf(stderr, "For list of command-line options use --help.\n");\r
- fprintf(stderr, "\n");\r
- exit(1);\r
-}\r
-\r
-static set<OptInfo>::iterator GetOptInfo(const string &LongName,\r
- bool ErrIfNotFound)\r
-{\r
- for (set<OptInfo>::iterator p = g_Opts.begin();\r
- p != g_Opts.end(); ++p)\r
- {\r
- const OptInfo &Opt = *p;\r
- if (Opt.LongName == LongName)\r
- return p;\r
- if (Opt.Type == OT_Tog && "no" + Opt.LongName == LongName)\r
- return p;\r
- }\r
- if (ErrIfNotFound)\r
- CmdLineErr("Option --%s is invalid", LongName.c_str());\r
- return g_Opts.end();\r
-}\r
-\r
-static void AddOpt(const OptInfo &Opt)\r
-{\r
- if (GetOptInfo(Opt.LongName, false) != g_Opts.end())\r
- Die("Option --%s defined twice", Opt.LongName.c_str());\r
- g_Opts.insert(Opt);\r
-}\r
-\r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
-#else\r
-#pragma warning(disable: 4505) // unreferenced local function\r
-#endif\r
-\r
-static void DefineFlagOpt(const string &LongName, const string &Help,\r
- void *Value, bool *OptSet)\r
-{\r
- *(bool *) Value = false;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.bDefault = false;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Flag;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void DefineTogOpt(const string &LongName, bool Default, const string &Help,\r
- void *Value, bool *OptSet)\r
-{\r
- *(bool *) Value = Default;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.bDefault = Default;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Tog;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void DefineIntOpt(const string &LongName, int Default, int Min, int Max,\r
- const string &Help, void *Value, bool *OptSet)\r
-{\r
- *(int *) Value = Default;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.iDefault = Default;\r
- Opt.iMin = Min;\r
- Opt.iMax = Max;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Int;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void DefineUnsOpt(const string &LongName, unsigned Default, unsigned Min,\r
- unsigned Max, const string &Help, void *Value, bool *OptSet)\r
-{\r
- *(unsigned *) Value = Default;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.uDefault = Default;\r
- Opt.uMin = Min;\r
- Opt.uMax = Max;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Uns;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void DefineFloatOpt(const string &LongName, double Default, double Min,\r
- double Max, const string &Help, void *Value, bool *OptSet)\r
-{\r
- *(double *) Value = Default;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.dDefault = Default;\r
- Opt.dMin = Min;\r
- Opt.dMax = Max;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Float;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void DefineStrOpt(const string &LongName, const char *Default,\r
- const string &Help, void *Value, bool *OptSet)\r
-{\r
- *(string *) Value = (Default == 0 ? "" : string(Default));\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.OptSet = OptSet;\r
- Opt.LongName = LongName;\r
- Opt.strDefault = (Default == 0 ? "" : string(Default));\r
- Opt.Help = Help;\r
- Opt.Type = OT_Str;\r
- AddOpt(Opt);\r
-}\r
-\r
-static void ParseEnumValues(const string &Values, map<string, unsigned> &EnumValues)\r
-{\r
- EnumValues.clear();\r
- \r
- string Name;\r
- string Value;\r
- bool Eq = false;\r
- for (string::const_iterator p = Values.begin(); ; ++p)\r
- {\r
- char c = (p == Values.end() ? '|' : *p);\r
- if (isspace(c))\r
- ;\r
- else if (c == '|')\r
- {\r
- if (EnumValues.find(Name) != EnumValues.end())\r
- Die("Invalid enum values, '%s' defined twice: '%s'",\r
- Name.c_str(), Values.c_str());\r
- if (Name.empty() || Value.empty())\r
- Die("Invalid enum values, empty name or value: '%s'",\r
- Values.c_str());\r
- \r
- EnumValues[Name] = atoi(Value.c_str());\r
- Name.clear();\r
- Value.clear();\r
- Eq = false;\r
- }\r
- else if (c == '=')\r
- Eq = true;\r
- else if (Eq)\r
- Value.push_back(c);\r
- else\r
- Name.push_back(c);\r
- if (p == Values.end())\r
- return;\r
- }\r
-}\r
-\r
-static void DefineEnumOpt(const string &LongName, const string &ShortName,\r
- int Default, const string &Values, const string &Help, void *Value)\r
-{\r
- *(int *) Value = Default;\r
- \r
- OptInfo Opt;\r
- Opt.Value = Value;\r
- Opt.LongName = LongName;\r
- Opt.iDefault = Default;\r
- Opt.Help = Help;\r
- Opt.Type = OT_Enum;\r
- ParseEnumValues(Values, Opt.EnumValues);\r
- AddOpt(Opt);\r
-}\r
-#undef FLAG_OPT\r
-#undef TOG_OPT\r
-#undef INT_OPT\r
-#undef UNS_OPT\r
-#undef FLT_OPT\r
-#undef STR_OPT\r
-#undef ENUM_OPT\r
-#define FLAG_OPT(LongName) bool opt_##LongName; bool optset_##LongName;\r
-#define TOG_OPT(LongName, Default) bool opt_##LongName; bool optset_##LongName;\r
-#define INT_OPT(LongName, Default, Min, Max) int opt_##LongName; bool optset_##LongName;\r
-#define UNS_OPT(LongName, Default, Min, Max) unsigned opt_##LongName; bool optset_##LongName;\r
-#define FLT_OPT(LongName, Default, Min, Max) double opt_##LongName; bool optset_##LongName;\r
-#define STR_OPT(LongName, Default) string opt_##LongName; bool optset_##LongName;\r
-#define ENUM_OPT(LongName, Values, Default) int opt_##LongName; bool optset_##LongName;\r
-#include "myopts.h"\r
-\r
-static int EnumStrToInt(const OptInfo &Opt, const string &Value)\r
-{\r
- const map<string, unsigned> &e = Opt.EnumValues;\r
- string s;\r
- for (map<string, unsigned>::const_iterator p = e.begin(); p != e.end(); ++p)\r
- {\r
- if (Value == p->first)\r
- return p->second;\r
- s += " " + p->first;\r
- }\r
- CmdLineErr("--%s %s not recognized, valid are: %s",\r
- Opt.LongName.c_str(), Value.c_str(), s.c_str());\r
- ureturn(-1);\r
-}\r
-\r
-static void SetOpt(OptInfo &Opt, const string &Value)\r
-{\r
- *Opt.OptSet = true;\r
- switch (Opt.Type)\r
- {\r
- case OT_Int:\r
- {\r
- *(int *) Opt.Value = atoi(Value.c_str());\r
- break;\r
- }\r
- case OT_Uns:\r
- {\r
- unsigned uValue = 0;\r
- int n = sscanf(Value.c_str(), "%u", &uValue);\r
- if (n != 1)\r
- CmdLineErr("Invalid value '%s' for --%s",\r
- Value.c_str(), Opt.LongName.c_str());\r
- *(unsigned *) Opt.Value = uValue;\r
- break;\r
- }\r
- case OT_Float:\r
- {\r
- *(double *) Opt.Value = atof(Value.c_str());\r
- break;\r
- }\r
- case OT_Str:\r
- {\r
- *(string *) Opt.Value = Value;\r
- break;\r
- }\r
- case OT_Enum:\r
- {\r
- *(int *) Opt.Value = EnumStrToInt(Opt, Value);\r
- break;\r
- }\r
- default:\r
- asserta(false);\r
- }\r
-}\r
-\r
-void LogOpts()\r
-{\r
- for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
- {\r
- const OptInfo &Opt = *p;\r
- Log("%s = ", Opt.LongName.c_str());\r
- switch (Opt.Type)\r
- {\r
- case OT_Flag:\r
- Log("%s", (*(bool *) Opt.Value) ? "yes" : "no");\r
- break;\r
- case OT_Tog:\r
- Log("%s", (*(bool *) Opt.Value) ? "on" : "off");\r
- break;\r
- case OT_Int:\r
- Log("%d", *(int *) Opt.Value);\r
- break;\r
- case OT_Uns:\r
- Log("%u", *(unsigned *) Opt.Value);\r
- break;\r
- case OT_Float:\r
- {\r
- double Value = *(double *) Opt.Value;\r
- if (Value == FLT_MAX)\r
- Log("*");\r
- else\r
- Log("%g", Value);\r
- break;\r
- }\r
- case OT_Str:\r
- Log("%s", (*(string *) Opt.Value).c_str());\r
- break;\r
- case OT_Enum:\r
- Log("%d", *(int *) Opt.Value);\r
- break;\r
- default:\r
- asserta(false);\r
- }\r
- Log("\n");\r
- }\r
-}\r
-\r
-static void CompilerInfo()\r
-{\r
-#ifdef _FILE_OFFSET_BITS\r
- printf("_FILE_OFFSET_BITS=%d\n", _FILE_OFFSET_BITS);\r
-#else\r
- printf("_FILE_OFFSET_BITS not defined\n");\r
-#endif\r
- \r
-#define x(t) printf("sizeof(" #t ") = %d\n", (int) sizeof(t));\r
- x(int)\r
- x(long)\r
- x(float)\r
- x(double)\r
- x(void *)\r
- x(off_t)\r
-#undef x\r
- exit(0);\r
-}\r
-\r
-void Split(const string &Str, vector<string> &Fields, char Sep)\r
-{\r
- Fields.clear();\r
- const unsigned Length = (unsigned) Str.size();\r
- string s;\r
- for (unsigned i = 0; i < Length; ++i)\r
- {\r
- char c = Str[i];\r
- if ((Sep == 0 && isspace(c)) || c == Sep)\r
- {\r
- if (!s.empty() || Sep != 0)\r
- Fields.push_back(s);\r
- s.clear();\r
- }\r
- else\r
- s.push_back(c);\r
- }\r
- if (!s.empty())\r
- Fields.push_back(s);\r
-}\r
-\r
-static void GetArgsFromFile(const string &FileName, vector<string> &Args)\r
-{\r
- Args.clear();\r
- \r
- FILE *f = OpenStdioFile(FileName);\r
- string Line;\r
- while (ReadLineStdioFile(f, Line))\r
- {\r
- size_t n = Line.find('#');\r
- if (n != string::npos)\r
- Line = Line.substr(0, n);\r
- vector<string> Fields;\r
- Split(Line, Fields);\r
- Args.insert(Args.end(), Fields.begin(), Fields.end());\r
- }\r
- CloseStdioFile(f);\r
-}\r
-\r
-void MyCmdLine(int argc, char **argv)\r
-{\r
- g_Opts.clear(); g_Argv.clear();\r
- static unsigned RecurseDepth = 0;\r
- ++RecurseDepth;\r
- \r
- DefineFlagOpt("compilerinfo", "Write info about compiler types and #defines to stdout.",\r
- (void *) &opt_compilerinfo, &optset_compilerinfo);\r
- DefineFlagOpt("quiet", "Turn off progress messages.", (void *) &opt_quiet, &optset_quiet);\r
- DefineFlagOpt("version", "Show version and exit.", (void *) &opt_version, &optset_version);\r
- DefineFlagOpt("logopts", "Log options.", (void *) &opt_logopts, &optset_logopts);\r
- DefineFlagOpt("help", "Display command-line options.", (void *) &opt_help, &optset_help);\r
- DefineStrOpt("log", "", "Log file name.", (void *) &opt_log, &optset_log);\r
- \r
-#undef FLAG_OPT\r
-#undef TOG_OPT\r
-#undef INT_OPT\r
-#undef UNS_OPT\r
-#undef FLT_OPT\r
-#undef STR_OPT\r
-#undef ENUM_OPT\r
-#define FLAG_OPT(LongName) DefineFlagOpt(#LongName, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define TOG_OPT(LongName, Default) DefineTogOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define INT_OPT(LongName, Default, Min, Max) DefineIntOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define UNS_OPT(LongName, Default, Min, Max) DefineUnsOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define FLT_OPT(LongName, Default, Min, Max) DefineFloatOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define STR_OPT(LongName, Default) DefineStrOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#define ENUM_OPT(LongName, Values, Default) DefineEnumOpt(#LongName, Values, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
-#include "myopts.h"\r
- \r
- if (RecurseDepth == 0)\r
- g_Argv.clear();\r
- \r
- for (int i = 0; i < argc; ++i) \r
- g_Argv.push_back(string(argv[i]));\r
- \r
- \r
- int i = 1;\r
- for (;;)\r
- {\r
- if (i >= argc)\r
- break;\r
- const string &Arg = g_Argv[i];\r
- \r
- if (Arg.empty())\r
- continue;\r
- else if (Arg == "file:" && i + 1 < argc)\r
- {\r
- const string &FileName = g_Argv[i+1];\r
- vector<string> Args;\r
- GetArgsFromFile(FileName, Args);\r
- for (vector<string>::const_iterator p = Args.begin();\r
- p != Args.end(); ++p)\r
- {\r
- g_Argv.push_back(*p);\r
- ++argc;\r
- }\r
- i += 2;\r
- continue;\r
- }\r
- else if (Arg.size() > 1 && Arg[0] == '-')\r
- {\r
- string LongName = (Arg.size() > 2 && Arg[1] == '-' ? Arg.substr(2) : Arg.substr(1));\r
- OptInfo Opt = *GetOptInfo(LongName, true);\r
- *Opt.OptSet = true;\r
- if (Opt.Type == OT_Flag)\r
- {\r
- g_Opts.erase(Opt);\r
- *(bool *) Opt.Value = true;\r
- g_Opts.insert(Opt);\r
- ++i;\r
- continue;\r
- }\r
- else if (Opt.Type == OT_Tog)\r
- {\r
- g_Opts.erase(Opt);\r
- if (string("no") + Opt.LongName == LongName)\r
- *(bool *) Opt.Value = false;\r
- else\r
- {\r
- asserta(Opt.LongName == LongName);\r
- *(bool *) Opt.Value = true;\r
- }\r
- g_Opts.insert(Opt);\r
- ++i;\r
- continue;\r
- }\r
- \r
- ++i;\r
- if (i >= argc)\r
- CmdLineErr("Missing value for option --%s", LongName.c_str());\r
- \r
- string Value = g_Argv[i];\r
- SetOpt(Opt, Value);\r
- \r
- ++i;\r
- continue;\r
- }\r
- else\r
- CmdLineErr("Expected -option_name or --option_name, got '%s'", Arg.c_str());\r
- }\r
- \r
- --RecurseDepth;\r
- if (RecurseDepth > 0)\r
- return;\r
- \r
- if (opt_help)\r
- Help();\r
- \r
- if (opt_compilerinfo)\r
- CompilerInfo();\r
- \r
- SetLogFileName(opt_log);\r
- \r
- if (opt_log != "")\r
- {\r
- for (int i = 0; i < argc; ++i)\r
- Log("%s%s", i == 0 ? "" : " ", g_Argv[i].c_str());\r
- Log("\n");\r
- time_t Now = time(0);\r
- struct tm *t = localtime(&Now);\r
- const char *s = asctime(t);\r
- Log("Started %s", s); // there is a newline in s\r
- Log("Version " MY_VERSION ".%s\n", SVN_VERSION);\r
- Log("\n");\r
- }\r
- \r
- if (opt_logopts)\r
- LogOpts();\r
-}\r
-\r
-double Pct(double x, double y)\r
-{\r
- if (y == 0.0f)\r
- return 0.0f;\r
- return (x*100.0f)/y;\r
-}\r
-\r
-void GetCmdLine(string &s)\r
-{\r
- s.clear();\r
- for (unsigned i = 0; i < SIZE(g_Argv); ++i)\r
- {\r
- if (i > 0)\r
- s += " ";\r
- s += g_Argv[i];\r
- }\r
-}\r
-\r
-char *mystrsave(const char *s)\r
-{\r
- unsigned n = unsigned(strlen(s));\r
- char *t = myalloc(char, n+1);\r
- memcpy(t, s, n+1);\r
- return t;\r
-}\r
-\r
-void Logu(unsigned u, unsigned w, unsigned prefixspaces)\r
-{\r
- for (unsigned i = 0; i < prefixspaces; ++i)\r
- Log(" ");\r
- if (u == UINT_MAX)\r
- Log("%*.*s", w, w, "*");\r
- else\r
- Log("%*u", w, u);\r
-}\r
-\r
-void Logf(float x, unsigned w, unsigned prefixspaces)\r
-{\r
- for (unsigned i = 0; i < prefixspaces; ++i)\r
- Log(" ");\r
- if (x == FLT_MAX)\r
- Log("%*.*s", w, w, "*");\r
- else\r
- Log("%*.2f", w, x);\r
-}\r
-\r
-static uint32 g_SLCG_state = 1;\r
-\r
-// Numerical values used by Microsoft C, according to wikipedia:\r
-// http://en.wikipedia.org/wiki/Linear_congruential_generator\r
-static uint32 g_SLCG_a = 214013;\r
-static uint32 g_SLCG_c = 2531011;\r
-\r
-// Simple Linear Congruential Generator\r
-// Bad properties; used just to initialize the better generator.\r
-static uint32 SLCG_rand()\r
-{\r
- g_SLCG_state = g_SLCG_state*g_SLCG_a + g_SLCG_c;\r
- return g_SLCG_state;\r
-}\r
-\r
-static void SLCG_srand(uint32 Seed)\r
-{\r
- g_SLCG_state = Seed;\r
- for (int i = 0; i < 10; ++i)\r
- SLCG_rand();\r
-}\r
-\r
-/***\r
- A multiply-with-carry random number generator, see:\r
- http://en.wikipedia.org/wiki/Multiply-with-carry\r
- \r
- The particular multipliers used here were found on\r
- the web where they are attributed to George Marsaglia.\r
- ***/\r
-\r
-static bool g_InitRandDone = false;\r
-static uint32 g_X[5];\r
-\r
-uint32 RandInt32()\r
-{\r
- InitRand();\r
- \r
- uint64 Sum = 2111111111*(uint64) g_X[3] + 1492*(uint64) g_X[2] +\r
- 1776*(uint64) g_X[1] + 5115*(uint64) g_X[0] + g_X[4];\r
- g_X[3] = g_X[2];\r
- g_X[2] = g_X[1];\r
- g_X[1] = g_X[0];\r
- g_X[4] = (uint32) (Sum >> 32);\r
- g_X[0] = (uint32) Sum;\r
- return g_X[0];\r
-}\r
-\r
-unsigned randu32()\r
-{\r
- return (unsigned) RandInt32();\r
-}\r
-\r
-void InitRand()\r
-{\r
- if (g_InitRandDone)\r
- return;\r
- // Do this first to avoid recursion\r
- g_InitRandDone = true;\r
- \r
- unsigned Seed = (optset_randseed ? opt_randseed : (unsigned) (time(0)*getpid()));\r
- Log("RandSeed=%u\n", Seed);\r
- SLCG_srand(Seed);\r
- \r
- for (unsigned i = 0; i < 5; i++)\r
- g_X[i] = SLCG_rand();\r
- \r
- for (unsigned i = 0; i < 100; i++)\r
- RandInt32();\r
-}\r
-\r
-// MUST COME AT END BECAUSE OF #undef\r
-#if RCE_MALLOC\r
-#undef mymalloc\r
-#undef myfree\r
-#undef myfree2\r
-void *mymalloc(unsigned bytes, const char *FileName, int Line)\r
-{\r
- void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
- return rce_malloc(bytes, FileName, Line);\r
-}\r
-\r
-void myfree(void *p, const char *FileName, int Line)\r
-{\r
- void rce_free(void *p, const char *FileName, int Line);\r
- rce_free(p, FileName, Line);\r
-}\r
-\r
-void myfree2(void *p, unsigned bytes, const char *FileName, int Line)\r
-{\r
- void rce_free(void *p, const char *FileName, int Line);\r
- rce_free(p, FileName, Line);\r
-}\r
-\r
-#else // RCE_MALLOC\r
-void *mymalloc(unsigned bytes)\r
-{\r
- ++g_NewCalls;\r
- if (g_InitialMemUseBytes == 0)\r
- g_InitialMemUseBytes = GetMemUseBytes();\r
- \r
- g_TotalAllocBytes += bytes;\r
- g_NetBytes += bytes;\r
- if (g_NetBytes > g_MaxNetBytes)\r
- {\r
- if (g_NetBytes > g_MaxNetBytes + 10000000)\r
- GetMemUseBytes();//to force update of peak\r
- g_MaxNetBytes = g_NetBytes;\r
- }\r
- void *p = malloc(bytes);\r
- //void *p = _malloc_dbg(bytes, _NORMAL_BLOCK, __FILE__, __LINE__);\r
- if (0 == p)\r
- {\r
- double b = GetMemUseBytes();\r
- fprintf(stderr, "\nOut of memory mymalloc(%u), curr %.3g bytes",\r
- (unsigned) bytes, b);\r
- void LogAllocs();\r
- LogAllocs();\r
-#if DEBUG && defined(_MSC_VER)\r
- asserta(_CrtCheckMemory());\r
-#endif\r
- Die("Out of memory, mymalloc(%u), curr %.3g bytes\n",\r
- (unsigned) bytes, b);\r
- }\r
- return p;\r
-}\r
-\r
-void myfree(void *p)\r
-{\r
- if (p == 0)\r
- return;\r
- free(p);\r
- //_free_dbg(p, _NORMAL_BLOCK);\r
-}\r
-\r
-void myfree2(void *p, unsigned bytes)\r
-{\r
- ++g_FreeCalls;\r
- g_TotalFreeBytes += bytes;\r
- g_NetBytes -= bytes;\r
- \r
- if (p == 0)\r
- return;\r
- free(p);\r
-}\r
-#endif\r
+++ /dev/null
-#ifndef myutils_h\r
-#define myutils_h\r
-\r
-#define RCE_MALLOC 0\r
-
-#include <stdio.h>\r
-#include <sys/types.h>\r
-#include <string>\r
-#include <string.h>\r
-#include <memory.h>\r
-#include <vector>\r
-#include <math.h>\r
-#include <stdarg.h>\r
-#include <cstdlib>\r
-#include <climits>\r
-\r
-#ifndef _MSC_VER\r
-#include <inttypes.h>\r
-#endif\r
-\r
-using namespace std;\r
-\r
-#ifdef _MSC_VER\r
-#include <crtdbg.h>\r
-#pragma warning(disable: 4996) // deprecated functions\r
-#define _CRT_SECURE_NO_DEPRECATE 1\r
-#endif\r
-\r
-#if defined(_DEBUG) && !defined(DEBUG)\r
-#define DEBUG 1\r
-#endif\r
-\r
-#if defined(DEBUG) && !defined(_DEBUG)\r
-#define _DEBUG 1\r
-#endif\r
-\r
-#ifndef NDEBUG\r
-#define DEBUG 1\r
-#define _DEBUG 1\r
-#endif\r
-\r
-typedef unsigned char byte;\r
-typedef unsigned short uint16;\r
-typedef unsigned uint32;\r
-typedef int int32;\r
-typedef double float32;\r
-typedef signed char int8;\r
-typedef unsigned char uint8;\r
-\r
-#ifdef _MSC_VER\r
-\r
-typedef __int64 int64;\r
-typedef unsigned __int64 uint64;\r
-\r
-#define INT64_PRINTF "lld"\r
-#define UINT64_PRINTF "llu"\r
-\r
-#define SIZE_T_PRINTF "u"\r
-#define OFF64_T_PRINTF "lld"\r
-\r
-#define INT64_PRINTFX "llx"\r
-#define UINT64_PRINTFX "llx"\r
-\r
-#define SIZE_T_PRINTFX "x"\r
-#define OFF64_T_PRINTFX "llx"\r
-\r
-#elif defined(__x86_64__)\r
-\r
-typedef long int64;\r
-typedef unsigned long uint64;\r
-\r
-#define INT64_PRINTF "ld"\r
-#define UINT64_PRINTF "lu"\r
-\r
-#define SIZE_T_PRINTF "lu"\r
-#define OFF64_T_PRINTF "ld"\r
-\r
-#define INT64_PRINTFX "lx"\r
-#define UINT64_PRINTFX "lx"\r
-\r
-#define SIZE_T_PRINTFX "lx"\r
-#define OFF64_T_PRINTFX "lx"\r
-\r
-#else\r
-\r
-typedef long long int64;\r
-typedef unsigned long long uint64;\r
-\r
-#define INT64_PRINTF "lld"\r
-#define UINT64_PRINTF "llu"\r
-\r
-#define SIZE_T_PRINTF "u"\r
-#define OFF64_T_PRINTF "lld"\r
-\r
-#define INT64_PRINTFX "llx"\r
-#define UINT64_PRINTFX "llx"\r
-\r
-#define SIZE_T_PRINTFX "x"\r
-#define OFF64_T_PRINTFX "llx"\r
-#endif\r
-\r
-#define d64 INT64_PRINTF\r
-#define u64 UINT64_PRINTF\r
-#define x64 UINT64_PRINTFX\r
-\r
-// const uint64 UINT64_MAX = (~((uint64) 0));\r
-\r
-void myassertfail(const char *Exp, const char *File, unsigned Line);\r
-#undef assert\r
-#ifdef NDEBUG\r
-#define assert(exp) ((void)0)\r
-#define myassert(exp) ((void)0)\r
-#else\r
-#define assert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
-#define myassert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
-#endif\r
-#define asserta(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
-\r
-#define ureturn(x) return (x)\r
-\r
-#define NotUsed(v) ((void *) &v)\r
-\r
-// pom=plus or minus, tof=true or false\r
-static inline char pom(bool Plus) { return Plus ? '+' : '-'; }\r
-static inline char tof(bool x) { return x ? 'T' : 'F'; }\r
-static inline char yon(bool x) { return x ? 'Y' : 'N'; }\r
-unsigned GetElapsedSecs();\r
-\r
-#if RCE_MALLOC\r
-\r
-void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
-void rce_free(void *p, const char *FileName, int LineNr);\r
-void rce_chkmem();\r
-\r
-void rce_dumpmem_(const char *FileName, int LineNr);\r
-#define rce_dumpmem() rce_dumpmem_(__FILE__, __LINE__)\r
-\r
-void rce_assertvalidptr_(void *p, const char *FileName, int LineNr);\r
-#define rce_assertvalidptr(p) rce_assertvalidptr_(p, __FILE__, __LINE__)\r
-\r
-void rce_dumpptr_(void *p, const char *FileName, int LineNr);\r
-#define rce_dumpptr(p) rce_dumpptr_(p, __FILE__, __LINE__)\r
-\r
-#define mymalloc(n) rce_malloc((n), __FILE__, __LINE__)\r
-#define myfree(p) rce_free(p, __FILE__, __LINE__)\r
-#define myfree2(p,n) rce_free(p, __FILE__, __LINE__)\r
-#define myalloc(t, n) (t *) rce_malloc((n)*sizeof(t), __FILE__, __LINE__)\r
-\r
-#else // RCE_MALLOC\r
-void *mymalloc(unsigned bytes);\r
-void myfree2(void *p, unsigned Bytes);\r
-void myfree(void *p);\r
-#define rce_chkmem() /* empty */\r
-#define myalloc(t, n) (t *) mymalloc((n)*sizeof(t))\r
-#endif // RCE_MALLOC\r
-\r
-#define SIZE(c) unsigned((c).size())\r
-\r
-bool myisatty(int fd);\r
-\r
-#ifdef _MSC_VER\r
-#define off_t __int64\r
-#endif\r
-\r
-FILE *OpenStdioFile(const string &FileName);\r
-FILE *CreateStdioFile(const string &FileName);\r
-bool CanSetStdioFilePos(FILE *f);\r
-void CloseStdioFile(FILE *f);\r
-void SetStdioFilePos(FILE *f, off_t Pos);\r
-void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes);\r
-void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes);\r
-void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes);\r
-void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes);\r
-bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes);\r
-bool ReadLineStdioFile(FILE *f, string &Line);\r
-byte *ReadAllStdioFile(FILE *f, off_t &FileSize);\r
-byte *ReadAllStdioFile(const string &FileName, off_t &FileSize);\r
-void AppendStdioFileToFile(FILE *fFrom, FILE *fTo);\r
-void FlushStdioFile(FILE *f);\r
-bool StdioFileExists(const string &FileName);\r
-off_t GetStdioFilePos(FILE *f);\r
-off_t GetStdioFileSize(FILE *f);\r
-void LogStdioFileState(FILE *f);\r
-void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo);\r
-void DeleteStdioFile(const string &FileName);\r
-\r
-void myvstrprintf(string &Str, const char *szFormat, va_list ArgList);\r
-void myvstrprintf(string &Str, const char *szFormat, ...);\r
-\r
-void SetLogFileName(const string &FileName);\r
-void Log(const char *szFormat, ...);\r
-\r
-void Die(const char *szFormat, ...);\r
-void Warning(const char *szFormat, ...);\r
-\r
-void ProgressStep(unsigned i, unsigned N, const char *Format, ...);\r
-void Progress(const char *szFormat, ...);\r
-void Progress(const string &Str);\r
-void ProgressLog(const char *szFormat, ...);\r
-void ProgressExit();\r
-\r
-char *mystrsave(const char *s);\r
-\r
-double GetPeakMemUseBytes();\r
-\r
-// Are two floats equal to within epsilon?\r
-const double epsilon = 0.01;\r
-inline bool feq(double x, double y, double epsilon)\r
- {\r
- if (fabs(x) > 10000)\r
- epsilon = fabs(x)/10000;\r
- if (fabs(x - y) > epsilon)\r
- return false;\r
- return true;\r
- }\r
-\r
-inline bool feq(double x, double y)\r
- {\r
- if (x < -1e6 && y < -1e6)\r
- return true;\r
- double e = epsilon;\r
- if (fabs(x) > 10000)\r
- e = fabs(x)/10000;\r
- if (fabs(x - y) > e)\r
- return false;\r
- return true;\r
- }\r
-\r
-#define asserteq(x, y) assert(feq(x, y))\r
-#define assertaeq(x, y) asserta(feq(x, y))\r
-\r
-#define zero(a, n) memset(a, 0, n*sizeof(a[0]))\r
-\r
-void InitRand();\r
-unsigned randu32();\r
-void Split(const string &Str, vector<string> &Fields, char Sep = 0);\r
-double Pct(double x, double y);\r
-double GetMemUseBytes();\r
-const char *MemBytesToStr(double Bytes);\r
-const char *IntToStr(unsigned i);\r
-const char *FloatToStr(double d);\r
-const char *SecsToStr(double Secs);\r
-void Logu(unsigned u, unsigned w, unsigned prefixspaces = 2);\r
-void Logf(float x, unsigned w, unsigned prefixspaces = 2);\r
-const char *SecsToHHMMSS(int Secs);\r
-\r
-void MyCmdLine(int argc, char **argv);\r
-void CmdLineErr(const char *Format, ...);\r
-void Help();\r
-void GetCmdLine(string &s);\r
-\r
-#define FLAG_OPT(LongName) extern bool opt_##LongName; extern bool optset_##LongName;\r
-#define TOG_OPT(LongName, Default) extern bool opt_##LongName; extern bool optset_##LongName;\r
-#define INT_OPT(LongName, Default, Min, Max) extern int opt_##LongName; extern bool optset_##LongName;\r
-#define UNS_OPT(LongName, Default, Min, Max) extern unsigned opt_##LongName; extern bool optset_##LongName;\r
-#define FLT_OPT(LongName, Default, Min, Max) extern double opt_##LongName; extern bool optset_##LongName;\r
-#define STR_OPT(LongName, Default) extern string opt_##LongName; extern bool optset_##LongName;\r
-#define ENUM_OPT(LongName, Default, Values) extern int opt_##LongName; extern bool optset_##LongName;\r
-#include "myopts.h"\r
-#undef FLAG_OPT\r
-#undef TOG_OPT\r
-#undef INT_OPT\r
-#undef UNS_OPT\r
-#undef FLT_OPT\r
-#undef STR_OPT\r
-#undef ENUM_OPT\r
-\r
-extern const char *SVN_VERSION;\r
-extern const char *SVN_MODS;\r
-extern bool opt_quiet;
-extern bool opt_version;
-extern FILE *g_fLog;
-\r
-#endif // myutils_h\r
+++ /dev/null
-#ifndef orf_h\r
-#define orf_h\r
-\r
-#include "alpha.h"\r
-\r
-struct ORFData\r
- {\r
- const byte *NucSeq;\r
- const byte *AminoSeq;\r
- int Frame;\r
- unsigned NucL;\r
- unsigned AminoL;\r
- unsigned NucLo;\r
- unsigned NucHi;\r
- ORFData *Next;\r
-\r
- unsigned GetNucPosFirstBase() const;\r
- unsigned GetAAPos(unsigned NucPos) const;\r
- unsigned GetCodex(unsigned NucPos) const;\r
- unsigned GetNucLo(unsigned AALo, unsigned AAHi) const;\r
- unsigned GetNucHi(unsigned AALo, unsigned AAHi) const;\r
- unsigned GetAALo(unsigned NucLo, unsigned NucHi) const;\r
- unsigned GetAAHi(unsigned NucLo, unsigned NucHi) const;\r
- unsigned GetNucPosFirstBaseInCodon(unsigned AAPos) const;\r
- unsigned GetNucPosLastBaseInCodon(unsigned AAPos) const;\r
- unsigned RoundToCodonLo(unsigned NucPos) const;\r
- unsigned RoundToCodonHi(unsigned NucPos) const;\r
- void LogMe() const;\r
- void LogMe2() const;\r
- };\r
-\r
-const byte ORFEND = '.';\r
-\r
-void GetORFs(const byte *NucSeq, unsigned NucL, vector<ORFData> &ORFs,\r
- unsigned ORFStyle, int FindFrame, int Sign);\r
-\r
-#endif // orf_h\r
+++ /dev/null
-#ifndef out_h\r
-#define out_h\r
-\r
-#include "seq.h"\r
-#include "hsp.h"\r
-#include "orf.h"\r
-#include "path.h"\r
-#include <float.h>\r
-\r
-struct AlnData\r
- {\r
-/***\r
-SA.Seq and SB.Seq align.\r
-Reverse strand stuff for nucleotides is handled like this:\r
- SA.RevComp must be false.\r
- If SB.RevComp is true, then SA.Seq is r.c.'d relative to the sequence in\r
- the input file (query or db). If so, coordinates in HSP refer to SB.Seq\r
- so are also r.c.'d relative to the original sequence.\r
-***/\r
- SeqData SA;\r
- SeqData SB;\r
- HSPData HSP;\r
- const char *Path;\r
- char IdDesc[256];\r
-\r
- float FractId;\r
- float RawScore;\r
- float BitScore;\r
- float Evalue;\r
-\r
- void LogMe() const\r
- {\r
- Log("AD: ");\r
- HSP.LogMe();\r
- Log(" %s,%s\n", SA.Label, SB.Label);\r
- }\r
- };\r
-\r
-bool OnDerepHit(const SeqData &SA, const SeqData &SB);\r
-\r
-bool OnLocalUngappedHit(const SeqData &SA, const SeqData &SB,\r
- const HSPData &HSP, float &Evalue, float &FractId);\r
-\r
-bool OnLocalGappedHit(const SeqData &SA, const SeqData &SB,\r
- const HSPData &HSP, const PathData &PD, float &Evalue, float &FractId);\r
-\r
-bool OnGlobalHit(const SeqData &SA, const SeqData &SB, const PathData &PD,\r
- float &FractId);\r
-\r
-void OnReject(const SeqData &SA, const SeqData &SB, double FractId,\r
- const char *Path);\r
-\r
-void OnNotMatched(const char *Label, unsigned L);\r
-void OnNewCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
-void OnNewLibCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
-void OnLibCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
- const char *Label);\r
-void OnNewCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
- const char *Label);\r
-void OnChainCov(const SeqData &NucleoSD, const SeqData &TargetSD,\r
- float Score, float ChainCov);\r
-\r
-void SetUserFieldIndexes(const string &s);\r
-\r
-void BlastOut(FILE *f, const AlnData &AD);\r
-void Blast6Out(FILE *f, const AlnData &AD);\r
-void FastaPairOut(FILE *f, const AlnData &AD);\r
-void UserOut(FILE *f, const AlnData &AD);\r
-\r
-void BlastOutORF(FILE *f, const AlnData &AD);\r
-\r
-void OpenOutputFiles();\r
-void CloseOutputFiles();\r
-void SetLibSeedCount(unsigned DBSeqCount);\r
-const char *UserFieldIndexToStr(unsigned i);\r
-\r
-extern float **g_SubstMx;\r
-\r
-static char g_IdChar = '|';\r
-static char g_DiffChar = ' ';\r
-\r
-static inline char GetSymN(byte Letter1, byte Letter2)\r
- {\r
- Letter1 = toupper(Letter1);\r
- Letter2 = toupper(Letter2);\r
- if (Letter1 == Letter2)\r
- return g_IdChar;\r
- return g_DiffChar;\r
- }\r
-\r
-static inline char GetSymA(byte Letter1, byte Letter2)\r
- {\r
- Letter1 = toupper(Letter1);\r
- Letter2 = toupper(Letter2);\r
- if (Letter1 == Letter2)\r
- return '|';\r
-\r
- float Score = g_SubstMx[Letter1][Letter2];\r
- if (Score >= 2.0f)\r
- return ':';\r
- if (Score > 0.0f)\r
- return '.';\r
- return ' ';\r
- }\r
-\r
-static inline char GetSym(byte Letter1, byte Letter2, bool Nucleo)\r
- {\r
- if (Nucleo)\r
- return GetSymN(Letter1, Letter2);\r
- else\r
- return GetSymA(Letter1, Letter2);\r
- }\r
-\r
-static unsigned GetNDig(unsigned n)\r
- {\r
- if (n < 10)\r
- return 1;\r
- if (n < 100)\r
- return 2;\r
- if (n < 1000)\r
- return 3;\r
- if (n < 10000)\r
- return 4;\r
- if (n < 100000)\r
- return 5;\r
- if (n < 1000000)\r
- return 6;\r
- return 10;\r
- }\r
-\r
-extern unsigned *g_UserFieldIndexes;\r
-extern unsigned g_UserFieldCount;\r
-\r
-#endif // out_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "path.h"\r
-#include "timing.h"\r
-\r
-#define TRACE 0\r
-\r
-const unsigned PathMagic = 0x9A783A16;\r
-\r
-struct PathBuffer\r
- {\r
- unsigned Magic;\r
- char *Buffer;\r
- unsigned Size;\r
- bool InUse;\r
- };\r
-\r
-static PathBuffer **g_PathBuffers;\r
-static unsigned g_PathBufferSize;\r
-\r
-static char *AllocBuffer(unsigned Size)\r
- {\r
- if (Size == 0)\r
- return 0;\r
-\r
-// Is a free buffer that is big enough?\r
- for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
- {\r
- PathBuffer *PB = g_PathBuffers[i];\r
- asserta(PB->Magic == PathMagic);\r
- if (!PB->InUse)\r
- {\r
- if (PB->Size >= Size)\r
- {\r
- PB->InUse = true;\r
- return PB->Buffer;\r
- }\r
- if (PB->Buffer == 0)\r
- {\r
- unsigned Size2 = Size + 1024;\r
- PB->Buffer = MYALLOC(char, Size2, Path);\r
- PB->Size = Size2;\r
- PB->InUse = true;\r
- return PB->Buffer;\r
- }\r
- }\r
- }\r
-\r
-// No available buffer, must expand g_PathBuffers[]\r
- unsigned NewPathBufferSize = g_PathBufferSize + 1024;\r
- PathBuffer **NewPathBuffers = MYALLOC(PathBuffer *, NewPathBufferSize, Path);\r
- \r
- for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
- NewPathBuffers[i] = g_PathBuffers[i];\r
-\r
- for (unsigned i = g_PathBufferSize; i < NewPathBufferSize; ++i)\r
- {\r
- PathBuffer *PB = MYALLOC(PathBuffer, 1, Path);\r
- PB->Magic = PathMagic;\r
- PB->Buffer = 0;\r
- PB->Size = 0;\r
- PB->InUse = false;\r
- NewPathBuffers[i] = PB;\r
- }\r
-\r
- PathBuffer *PB = NewPathBuffers[g_PathBufferSize];\r
-\r
- MYFREE(g_PathBuffers, g_PathBufferSize, Path);\r
- g_PathBuffers = NewPathBuffers;\r
- g_PathBufferSize = NewPathBufferSize;\r
-\r
- asserta(!PB->InUse && PB->Buffer == 0);\r
-\r
- unsigned Size2 = Size + 1024;\r
- PB->Buffer = MYALLOC(char, Size2, Path);\r
- PB->Size = Size2;\r
- PB->InUse = true;\r
- return PB->Buffer;\r
- }\r
-\r
-static void FreeBuffer(char *Buffer)\r
- {\r
- if (Buffer == 0)\r
- return;\r
-\r
- for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
- {\r
- PathBuffer *PB = g_PathBuffers[i];\r
- if (PB->Buffer == Buffer)\r
- {\r
- asserta(PB->InUse);\r
- PB->InUse = false;\r
- return;\r
- }\r
- }\r
-\r
- Die("FreeBuffer, not found");\r
- }\r
-\r
-void PathData::Alloc(unsigned MaxLen)\r
- {\r
- if (MaxLen < Bytes)\r
- return;\r
-\r
- StartTimer(PathAlloc);\r
- if (Bytes > 0)\r
- {\r
- FreeBuffer(Front);\r
- }\r
-\r
- Bytes = MaxLen + 1;\r
- Front = AllocBuffer(Bytes);\r
- Back = Front + Bytes - 1;\r
- Start = 0;\r
- EndTimer(PathAlloc);\r
- }\r
-\r
-void PathData::Free()\r
- {\r
- FreeBuffer(Front);\r
- Front = 0;\r
- Start = 0;\r
- Back = 0;\r
- }\r
-\r
-void PathData::Copy(const PathData &rhs)\r
- {\r
- Alloc(rhs.Bytes);\r
- strcpy(Front, rhs.Front);\r
- Start = Front + (rhs.Start - rhs.Front);\r
- }\r
-\r
-void PathData::FromStr(const char *PathStr)\r
- {\r
- asserta(PathStr != 0);\r
- unsigned NeededBytes = (unsigned) strlen(PathStr) + 1;\r
- Alloc(NeededBytes);\r
- strcpy(Front, PathStr);\r
- Start = Front;\r
- }\r
-\r
-void LogPathStats()\r
- {\r
- Log("\n");\r
- unsigned Bytes = 0;\r
- for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
- {\r
- const PathBuffer *PB = g_PathBuffers[i];\r
- Bytes += PB->Size;\r
- }\r
- Log("%u paths allocated, total memory %u bytes\n", g_PathBufferSize, Bytes);\r
- }\r
+++ /dev/null
-#ifndef path_h\r
-#define path_h\r
-\r
-struct PathData\r
- {\r
-private:\r
- PathData(PathData &);\r
- PathData &operator=(PathData &);\r
-\r
-public:\r
- char *Start;\r
- char *Front;\r
- char *Back;\r
- unsigned Bytes;\r
-\r
-public:\r
- PathData()\r
- {\r
- Clear(true);\r
- }\r
- ~PathData()\r
- {\r
- Free();\r
- }\r
- void Free();\r
- void Alloc(unsigned MaxLen);\r
- void Clear(bool ctor = false)\r
- {\r
- Start = 0;\r
- if (ctor)\r
- {\r
- Front = 0;\r
- Back = 0;\r
- Bytes = 0;\r
- }\r
- else\r
- Free();\r
- }\r
- void Copy(const PathData &rhs);\r
- void FromStr(const char *PathStr);\r
- void Reverse()\r
- {\r
- asserta(Start != 0);\r
- unsigned L = (unsigned) strlen(Start);\r
- for (unsigned k = 0; k < L/2; ++k)\r
- {\r
- char c = Start[k];\r
- Start[k] = Start[L-k-1];\r
- Start[L-k-1] = c;\r
- }\r
- }\r
- void SetEmpty()\r
- {\r
- Start = 0;\r
- }\r
-\r
- bool IsEmpty() const\r
- {\r
- return Start == 0;\r
- }\r
- };\r
-\r
-#endif // path_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "ultra.h"\r
-#include "chime.h"\r
-#include "uc.h"\r
-#include "dp.h"\r
-#include <set>\r
-#include <algorithm>\r
-\r
-#define TRACE 0\r
-\r
-extern FILE *g_fUChime;\r
-\r
-void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
- vector<unsigned> &Parents);\r
-\r
-void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
- const string &PathQA, const string &PathQB, ChimeHit2 &Hit);\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);\r
-\r
-static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,\r
- vector<unsigned> &IdVec, unsigned d)\r
- {\r
- IdVec.clear();\r
- const unsigned ColCount = SIZE(Path);\r
-\r
- const byte *Q = QSD.Seq;\r
- const byte *P = PSD.Seq;\r
-\r
- const unsigned QL = QSD.L;\r
- const unsigned PL = PSD.L;\r
-\r
- if (QL <= d)\r
- {\r
- IdVec.resize(QSD.L, 0);\r
- return;\r
- }\r
-\r
- unsigned QPos = 0;\r
- unsigned PPos = 0;\r
-\r
- vector<bool> SameVec;\r
- SameVec.reserve(QL);\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- char c = Path[Col];\r
-\r
- bool Same = false;\r
- if (c == 'M')\r
- {\r
- byte q = Q[QPos];\r
- byte p = P[PPos];\r
- Same = (toupper(q) == toupper(p));\r
- }\r
-\r
- if (c == 'M' || c == 'D')\r
- {\r
- ++QPos;\r
- SameVec.push_back(Same);\r
- }\r
-\r
- if (c == 'M' || c == 'I')\r
- ++PPos;\r
- }\r
-\r
- asserta(SIZE(SameVec) == QL);\r
-\r
- unsigned n = 0;\r
- for (unsigned QPos = 0; QPos < d; ++QPos)\r
- {\r
- if (SameVec[QPos])\r
- ++n;\r
- IdVec.push_back(n);\r
- }\r
-\r
- for (unsigned QPos = d; QPos < QL; ++QPos)\r
- {\r
- if (SameVec[QPos])\r
- ++n;\r
- IdVec.push_back(n);\r
- if (SameVec[QPos-d])\r
- --n;\r
- }\r
- asserta(SIZE(IdVec) == QL);\r
-\r
-#if TRACE\r
- {\r
- Log("\n");\r
- Log("GetSmoothedIdVec\n");\r
- unsigned QPos = 0;\r
- unsigned PPos = 0;\r
- Log("Q P Same Id\n");\r
- Log("- - ---- -------\n");\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- char c = Path[Col];\r
-\r
- bool Same = false;\r
- if (c == 'M')\r
- {\r
- byte q = Q[QPos];\r
- byte p = P[PPos];\r
- Same = (toupper(q) == toupper(p));\r
- Log("%c %c %4c %7d\n", q, p, tof(Same), IdVec[QPos]);\r
- }\r
-\r
- if (c == 'M' || c == 'D')\r
- ++QPos;\r
- if (c == 'M' || c == 'I')\r
- ++PPos;\r
- }\r
- }\r
-#endif\r
- }\r
-\r
-bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
- const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
- float MinFractId, ChimeHit2 &Hit)\r
- {\r
- Hit.Clear();\r
- Hit.QLabel = QSD.Label;\r
-\r
- if (opt_verbose)\r
- {\r
- Log("\n");\r
- Log("SearchChime()\n");\r
- Log("Query>%s\n", QSD.Label);\r
- }\r
-\r
- vector<unsigned> Parents;\r
- GetCandidateParents(U, QSD, QAb, Parents);\r
-\r
- unsigned ParentCount = SIZE(Parents);\r
- if (ParentCount <= 1)\r
- {\r
- if (opt_verbose)\r
- Log("%u candidate parents, done.\n", ParentCount);\r
- return false;\r
- }\r
-\r
- if (opt_fastalign)\r
- HF.SetA(QSD);\r
- HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);\r
-\r
- unsigned ChunkLength;\r
- vector<unsigned> ChunkLos;\r
- GetChunkInfo(QSD.L, ChunkLength, ChunkLos);\r
- const unsigned ChunkCount = SIZE(ChunkLos);\r
-\r
- vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);\r
- vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);\r
-\r
- vector<SeqData> PSDs;\r
- vector<string> Paths;\r
- double TopPctId = 0.0;\r
- unsigned TopParentIndex = UINT_MAX;\r
- unsigned QL = QSD.L;\r
- vector<unsigned> MaxIdVec(QL, 0);\r
- for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
- {\r
- unsigned ParentSeqIndex = Parents[ParentIndex];\r
-\r
- SeqData PSD;\r
- //PSD.Label = U.GetSeedLabel(ParentSeqIndex);\r
- //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);\r
- //PSD.L = U.GetSeedLength(ParentSeqIndex);\r
- //PSD.Index = ParentSeqIndex;\r
- U.GetSeqData(ParentSeqIndex, PSD);\r
- PSDs.push_back(PSD);\r
-\r
- if (opt_fastalign)\r
- HF.SetB(PSD);\r
-\r
- PathData PD;\r
-\r
- float HSPId;\r
- bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);\r
- if (!Found)\r
- {\r
- Paths.push_back(""); \r
- continue;\r
- }\r
-\r
- double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);\r
- if (opt_selfid && PctId == 100.0)\r
- {\r
- Paths.push_back(""); \r
- continue;\r
- }\r
-\r
- if (PctId > TopPctId)\r
- {\r
- TopParentIndex = ParentIndex;\r
- TopPctId = PctId;\r
- if (TopPctId >= 100.0 - opt_mindiv)\r
- {\r
- if (opt_verbose)\r
- {\r
- Log(" %.1f%% >%s\n", TopPctId, PSD.Label);\r
- Log(" Top hit exceeds ctl threshold, done.\n");\r
- return false;\r
- }\r
- }\r
- }\r
-\r
- string Path = PD.Start;\r
- Paths.push_back(Path);\r
-\r
- vector<unsigned> IdVec;\r
- GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
-\r
- for (unsigned QPos = 0; QPos < QL; ++QPos)\r
- if (IdVec[QPos] > MaxIdVec[QPos])\r
- MaxIdVec[QPos] = IdVec[QPos];\r
- }\r
-\r
- vector<unsigned> BestParents;\r
- for (unsigned k = 0; k < opt_maxp; ++k)\r
- {\r
- unsigned BestParent = UINT_MAX;\r
- unsigned BestCov = 0;\r
- for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
- {\r
- const SeqData &PSD = PSDs[ParentIndex];\r
- const string &Path = Paths[ParentIndex];\r
- if (Path == "")\r
- continue;\r
-\r
- vector<unsigned> IdVec;\r
- GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
-\r
- unsigned Cov = 0;\r
- for (unsigned QPos = 0; QPos < QL; ++QPos)\r
- if (IdVec[QPos] == MaxIdVec[QPos])\r
- ++Cov;\r
-\r
- if (Cov > BestCov)\r
- {\r
- BestParent = ParentIndex;\r
- BestCov = Cov;\r
- }\r
- }\r
-\r
- if (BestParent == UINT_MAX)\r
- break;\r
-\r
- BestParents.push_back(BestParent);\r
- vector<unsigned> IdVec;\r
-\r
- const SeqData &PSD = PSDs[BestParent];\r
- const string &Path = Paths[BestParent];\r
- GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
- for (unsigned QPos = 0; QPos < QL; ++QPos)\r
- if (IdVec[QPos] == MaxIdVec[QPos])\r
- MaxIdVec[QPos] = UINT_MAX;\r
- }\r
-\r
- unsigned BestParentCount = SIZE(BestParents);\r
-\r
- if (opt_verbose)\r
- {\r
- Log("%u/%u best parents\n", BestParentCount, ParentCount);\r
- for (unsigned k = 0; k < BestParentCount; ++k)\r
- {\r
- unsigned i = BestParents[k];\r
- Log(" %s\n", PSDs[i].Label);\r
- }\r
- }\r
-\r
- bool Found = false;\r
- for (unsigned k1 = 0; k1 < BestParentCount; ++k1)\r
- {\r
- unsigned i1 = BestParents[k1];\r
- asserta(i1 < ParentCount);\r
-\r
- const SeqData &PSD1 = PSDs[i1];\r
- const string &Path1 = Paths[i1];\r
-\r
- for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)\r
- {\r
- unsigned i2 = BestParents[k2];\r
- asserta(i2 < ParentCount);\r
- asserta(i2 != i1);\r
-\r
- const SeqData &PSD2 = PSDs[i2];\r
- const string &Path2 = Paths[i2];\r
-\r
- ChimeHit2 Hit2;\r
- AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);\r
- Hit2.PctIdQT = TopPctId;\r
-\r
- if (Hit2.Accept())\r
- Found = true;\r
-\r
- if (Hit2.Score > Hit.Score)\r
- Hit = Hit2;\r
-\r
- if (opt_verbose)\r
- Hit2.LogMe();\r
- }\r
- }\r
-\r
- return Found;\r
- }\r
+++ /dev/null
-#ifndef seq_h\r
-#define seq_h\r
-\r
-struct ORFData;\r
-\r
-struct SeqData\r
- {\r
- const char *Label;\r
- const byte *Seq;\r
- unsigned L;\r
- unsigned Index;\r
-\r
-// RevComp means that SeqData.Seq is reverse-complemented relative\r
-// to the sequence in the input file (query or db). Coordinates in\r
-// a hit (e.g., AlnData) will be relative to SeqData.Seq, so both\r
-// the sequence and the coordinates should be r.c.'d for output.\r
- bool RevComp;\r
- bool Nucleo;\r
- const ORFData *ORFParent;\r
-\r
- SeqData()\r
- {\r
- Clear();\r
- }\r
-\r
- void Clear()\r
- {\r
- Label = 0;\r
- Seq = 0;\r
- L = 0;\r
- Index = UINT_MAX;\r
- RevComp = false;\r
- Nucleo = false;\r
- ORFParent = 0;\r
- }\r
- };\r
-\r
-#endif // seq_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "seqdb.h"\r
-#include "alpha.h"\r
-#include "timing.h"\r
-#include "sfasta.h"\r
-#include "seq.h"\r
-\r
-void SeqToFasta(FILE *f, const char *Label, const byte *Seq, unsigned L)\r
- {\r
- const unsigned ROWLEN = 80;\r
- if (Label != 0)\r
- fprintf(f, ">%s\n", Label);\r
- unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
- for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
- {\r
- unsigned From = BlockIndex*ROWLEN;\r
- unsigned To = From + ROWLEN;\r
- if (To >= L)\r
- To = L;\r
- for (unsigned Pos = From; Pos < To; ++Pos)\r
- fputc(Seq[Pos], f);\r
- fputc('\n', f);\r
- }\r
- }\r
-\r
-SeqDB::~SeqDB()\r
- {\r
- Clear();\r
- }\r
-\r
-SeqDB::SeqDB()\r
- {\r
- Clear(true);\r
- }\r
-\r
-void SeqDB::Clear(bool ctor)\r
- {\r
- if (!ctor)\r
- {\r
- for (unsigned i = 0; i < m_SeqCount; ++i)\r
- {\r
- unsigned n = strlen(m_Labels[i]);\r
- MYFREE(m_Labels[i], n, SeqDB);\r
- MYFREE(m_Seqs[i], m_SeqLengths[i], SeqDB);\r
- }\r
- MYFREE(m_Labels, m_Size, SeqDB);\r
- MYFREE(m_Seqs, m_Size, SeqDB);\r
- MYFREE(m_SeqLengths, m_Size, SeqDB);\r
- }\r
-\r
- m_FileName.clear();\r
- m_SeqCount = 0;\r
- m_Size = 0;\r
-\r
- m_Labels = 0;\r
- m_Seqs = 0;\r
- m_SeqLengths = 0;\r
-\r
- m_Aligned = false;\r
- m_IsNucleo = false;\r
- m_IsNucleoSet = false;\r
- }\r
-\r
-void SeqDB::InitEmpty(bool Nucleo)\r
- {\r
- Clear();\r
- m_IsNucleo = Nucleo;\r
- m_IsNucleoSet = true;\r
- }\r
-\r
-void SeqDB::FromFasta(const string &FileName, bool AllowGaps)\r
- {\r
- Clear();\r
- m_FileName = FileName;\r
- SFasta SF;\r
-\r
- SF.Open(FileName);\r
- SF.m_AllowGaps = AllowGaps;\r
-\r
- ProgressStep(0, 1000, "Reading %s", FileName.c_str());\r
- for (;;)\r
- {\r
- unsigned QueryPctDoneX10 = SF.GetPctDoneX10();\r
- ProgressStep(QueryPctDoneX10, 1000, "Reading %s", FileName.c_str());\r
- const byte *Seq = SF.GetNextSeq();\r
- if (Seq == 0)\r
- break;\r
-\r
- const char *Label = SF.GetLabel();\r
- unsigned L = SF.GetSeqLength();\r
- AddSeq(Label, Seq, L);\r
- }\r
- ProgressStep(999, 1000, "Reading %s", FileName.c_str());\r
-\r
- SetIsNucleo();\r
-\r
- Progress("%s sequences\n", IntToStr(GetSeqCount()));\r
- }\r
-\r
-void SeqDB::ToFasta(const string &FileName) const\r
- {\r
- FILE *f = CreateStdioFile(FileName);\r
- for (unsigned SeqIndex = 0; SeqIndex < GetSeqCount(); ++SeqIndex)\r
- ToFasta(f, SeqIndex);\r
- CloseStdioFile(f);\r
- }\r
-\r
-void SeqDB::SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel) const\r
- {\r
- if (WithLabel)\r
- fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
-\r
- const unsigned ROWLEN = 80;\r
-\r
- unsigned L = GetSeqLength(SeqIndex);\r
- const byte *Seq = GetSeq(SeqIndex);\r
- unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
- for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
- {\r
- unsigned From = BlockIndex*ROWLEN;\r
- unsigned To = From + ROWLEN;\r
- if (To >= L)\r
- To = L;\r
- for (unsigned Pos = From; Pos < To; ++Pos)\r
- fputc(Seq[Pos], f);\r
- fputc('\n', f);\r
- }\r
- }\r
-\r
-void SeqDB::ToFasta(FILE *f, unsigned SeqIndex) const\r
- {\r
- asserta(SeqIndex < m_SeqCount);\r
- fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
- SeqToFasta(f, SeqIndex);\r
- }\r
-\r
-unsigned SeqDB::GetMaxLabelLength() const\r
- {\r
- const unsigned SeqCount = GetSeqCount();\r
- unsigned MaxL = 0;\r
- for (unsigned Index = 0; Index < SeqCount; ++Index)\r
- {\r
- unsigned L = (unsigned) strlen(m_Labels[Index]);\r
- if (L > MaxL)\r
- MaxL = L;\r
- }\r
- return MaxL;\r
- }\r
-\r
-unsigned SeqDB::GetMaxSeqLength() const\r
- {\r
- const unsigned SeqCount = GetSeqCount();\r
- unsigned MaxL = 0;\r
- for (unsigned Index = 0; Index < SeqCount; ++Index)\r
- {\r
- unsigned L = m_SeqLengths[Index];\r
- if (L > MaxL)\r
- MaxL = L;\r
- }\r
- return MaxL;\r
- }\r
-\r
-void SeqDB::LogMe() const\r
- {\r
- Log("\n");\r
- const unsigned SeqCount = GetSeqCount();\r
- Log("SeqDB %u seqs, aligned=%c\n", SeqCount, tof(m_Aligned));\r
- if (SeqCount == 0)\r
- return;\r
-\r
- Log("Index Label Length Seq\n");\r
- Log("----- ---------------- ------ ---\n");\r
- for (unsigned Index = 0; Index < SeqCount; ++Index)\r
- {\r
- Log("%5u", Index);\r
- Log(" %16.16s", m_Labels[Index]);\r
- unsigned L = m_SeqLengths[Index];\r
- Log(" %6u", L);\r
- Log(" %*.*s", L, L, m_Seqs[Index]);\r
- Log("\n");\r
- }\r
- }\r
-\r
-void SeqDB::GetSeqData(unsigned Id, SeqData &Buffer) const\r
- {\r
- asserta(Id < m_SeqCount);\r
- Buffer.Seq = m_Seqs[Id];\r
- Buffer.Label = m_Labels[Id];\r
- Buffer.L = m_SeqLengths[Id];\r
- Buffer.Index = Id;\r
- Buffer.ORFParent = 0;\r
- Buffer.RevComp = false;\r
- Buffer.Nucleo = IsNucleo();\r
- }\r
-\r
-void SeqDB::SetIsNucleo()\r
- {\r
- const unsigned SeqCount = GetSeqCount();\r
- unsigned N = 0;\r
- for (unsigned i = 0; i < 100; ++i)\r
- {\r
- unsigned SeqIndex = unsigned(rand()%SeqCount);\r
- const byte *Seq = GetSeq(SeqIndex);\r
- unsigned L = GetSeqLength(SeqIndex);\r
- const unsigned Pos = unsigned(rand()%L);\r
- byte c = Seq[Pos];\r
-\r
- if (g_IsNucleoChar[c])\r
- ++N;\r
- }\r
- m_IsNucleo = (N > 80);\r
- m_IsNucleoSet = true;\r
- }\r
-\r
-unsigned SeqDB::GetTotalLength() const\r
- {\r
- const unsigned SeqCount = GetSeqCount();\r
- unsigned TotalLength = 0;\r
- for (unsigned Id = 0; Id < SeqCount; ++Id)\r
- TotalLength += GetSeqLength(Id);\r
- return TotalLength;\r
- }\r
-\r
-unsigned SeqDB::AddSeq(const char *Label, const byte *Seq, unsigned L)\r
- {\r
- StartTimer(AddSeq);\r
- if (m_SeqCount >= m_Size)\r
- {\r
- unsigned NewSize = unsigned(m_Size*1.5) + 1024;\r
- char **NewLabels = MYALLOC(char *, NewSize, SeqDB);\r
- byte **NewSeqs = MYALLOC(byte *, NewSize, SeqDB);\r
- unsigned *NewSeqLengths = MYALLOC(unsigned, NewSize, SeqDB);\r
-\r
- for (unsigned i = 0; i < m_SeqCount; ++i)\r
- {\r
- NewLabels[i] = m_Labels[i];\r
- NewSeqs[i] = m_Seqs[i];\r
- NewSeqLengths[i] = m_SeqLengths[i];\r
- }\r
-\r
- MYFREE(m_Labels, m_SeqCount, SeqDB);\r
- MYFREE(m_Seqs, m_SeqCount, SeqDB);\r
- MYFREE(m_SeqLengths, m_SeqCount, SeqDB);\r
-\r
- m_Labels = NewLabels;\r
- m_Seqs = NewSeqs;\r
- m_SeqLengths = NewSeqLengths;\r
- m_Size = NewSize;\r
- }\r
-\r
- unsigned Index = m_SeqCount++;\r
- m_Seqs[Index] = MYALLOC(byte, L, SeqDB);\r
- memcpy(m_Seqs[Index], Seq, L);\r
-\r
- unsigned n = strlen(Label) + 1;\r
- m_Labels[Index] = MYALLOC(char, n, SeqDB);\r
- memcpy(m_Labels[Index], Label, n);\r
-\r
- if (Index == 0)\r
- m_Aligned = true;\r
- else\r
- m_Aligned = (m_Aligned && L == m_SeqLengths[0]);\r
-\r
- m_SeqLengths[Index] = L;\r
-\r
- EndTimer(AddSeq);\r
- return Index;\r
- }\r
-\r
-unsigned SeqDB::GetIndex(const char *Label) const\r
- {\r
- for (unsigned i = 0; i < m_SeqCount; ++i)\r
- if (strcmp(Label, m_Labels[i]) == 0)\r
- return i;\r
- Die("SeqDB::GetIndex(%s), not found", Label);\r
- return UINT_MAX;\r
- }\r
-\r
-void SeqDB::MakeLabelToIndex(map<string, unsigned> &LabelToIndex)\r
- {\r
- LabelToIndex.clear();\r
- for (unsigned i = 0; i < m_SeqCount; ++i)\r
- {\r
- const string &Label = string(GetLabel(i));\r
- if (LabelToIndex.find(Label) != LabelToIndex.end())\r
- Die("Duplicate label: %s", Label.c_str());\r
- LabelToIndex[Label] = i;\r
- }\r
- }\r
+++ /dev/null
-#ifndef seqdb_h\r
-#define seqdb_h\r
-\r
-#include <vector>\r
-#include <map>\r
-\r
-struct SeqData;\r
-\r
-using namespace std;\r
-\r
-struct SeqDB\r
- {\r
-private:\r
- SeqDB(const SeqDB &rhs);\r
- SeqDB &operator=(const SeqDB &rhs);\r
-\r
-public:\r
- string m_FileName;\r
- char **m_Labels;\r
- byte **m_Seqs;\r
- unsigned *m_SeqLengths;\r
- unsigned m_SeqCount;\r
- unsigned m_Size;\r
-\r
- bool m_Aligned;\r
- bool m_IsNucleo;\r
- bool m_IsNucleoSet;\r
-\r
-public:\r
- SeqDB();\r
- ~SeqDB();\r
- void Clear(bool ctor = false);\r
- void InitEmpty(bool Nucleo);\r
-\r
- unsigned AddSeq(const char *Label, const byte *Seq, unsigned L);\r
-\r
- byte *GetSeq(unsigned SeqIndex) const\r
- {\r
- asserta(SeqIndex < m_SeqCount);\r
- return m_Seqs[SeqIndex];\r
- }\r
-\r
- const char *GetLabel(unsigned SeqIndex) const\r
- {\r
- asserta(SeqIndex < m_SeqCount);\r
- return m_Labels[SeqIndex];\r
- }\r
-\r
- unsigned GetSeqLength(unsigned SeqIndex) const\r
- {\r
- asserta(SeqIndex < m_SeqCount);\r
- return m_SeqLengths[SeqIndex];\r
- }\r
-\r
- unsigned GetSeqCount() const\r
- {\r
- return m_SeqCount;\r
- }\r
-\r
- unsigned GetPairCount() const\r
- {\r
- unsigned SeqCount = GetSeqCount();\r
- return (SeqCount*(SeqCount - 1))/2;\r
- }\r
-\r
- unsigned GetPairIndex(unsigned SeqIndex1, unsigned SeqIndex2) const\r
- {\r
- if (SeqIndex1 > SeqIndex2)\r
- return (SeqIndex1*(SeqIndex1 - 1))/2 + SeqIndex2;\r
- return (SeqIndex2*(SeqIndex2 - 1))/2 + SeqIndex1;\r
- }\r
-\r
- unsigned GetColCount() const\r
- {\r
- if (!m_Aligned)\r
- Die("SeqDB::GetColCount, not aligned");\r
- if (m_SeqCount == 0)\r
- Die("SeqDB::GetColCount, empty");\r
- return m_SeqLengths[0];\r
- }\r
-\r
- bool IsNucleo() const\r
- {\r
- asserta(m_IsNucleoSet);\r
- return m_IsNucleo;\r
- }\r
-\r
- void GetSeqData(unsigned Id, SeqData &Buffer) const;\r
-\r
- unsigned GetMaxLabelLength() const;\r
- unsigned GetMaxSeqLength() const;\r
- void SetIsNucleo();\r
- unsigned GetIndex(const char *Label) const;\r
- void MakeLabelToIndex(map<string, unsigned> &LabelToIndex);\r
-\r
- void LogMe() const;\r
- void FromFasta(const string &FileName, bool AllowGaps = false);\r
-\r
- void ToFasta(const string &FileName) const;\r
- void ToFasta(FILE *f, unsigned SeqIndex) const;\r
- void SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel = false) const;\r
-\r
- unsigned GetTotalLength() const;\r
- };\r
-\r
-bool isgap(byte c);\r
-\r
-#endif\r
+++ /dev/null
-#include "myutils.h"
-#include "mx.h"
-
-Mx<float> g_SubstMxf;
-float **g_SubstMx;
-
-static const char Alphabet[] = "ACGTU";
-
-void SetNucSubstMx(double Match, double Mismatch)\r
- {\r
- static bool Done = false;\r
- if (Done)\r
- return;\r
- Done = true;\r
-\r
- if (Match <= 0.0)\r
- Die("Match score should be +ve");\r
- if (Mismatch >= 0.0)\r
- Die("Mismatch score should be -ve");\r
-\r
- unsigned N = unsigned(strlen(Alphabet));\r
-\r
- g_SubstMxf.Alloc("NUCMX", 256, 256);\r
- strcpy(g_SubstMxf.m_Alpha, "ACGT");\r
- g_SubstMxf.Init(0);\r
- g_SubstMx = g_SubstMxf.GetData();\r
- for (unsigned i = 0; i < N; ++i)\r
- {\r
- for (unsigned j = 0; j < N; ++j)\r
- {\r
- float v = float(i == j ? Match : Mismatch);\r
-\r
- byte ui = (byte) toupper(Alphabet[i]);\r
- byte uj = (byte) toupper(Alphabet[j]);\r
- byte li = (byte) tolower(ui);\r
- byte lj = (byte) tolower(uj);\r
- ui = (byte) toupper(ui);\r
- uj = (byte) toupper(uj);\r
-\r
- g_SubstMx[ui][uj] = v;\r
- g_SubstMx[uj][ui] = v;\r
-\r
- g_SubstMx[ui][lj] = v;\r
- g_SubstMx[uj][li] = v;\r
-\r
- g_SubstMx[li][uj] = v;\r
- g_SubstMx[lj][ui] = v;\r
-\r
- g_SubstMx[li][lj] = v;\r
- g_SubstMx[lj][li] = v;\r
- }\r
- }\r
-\r
- for (unsigned j = 0; j < N; ++j)\r
- {\r
- float v = 0.0f;\r
-\r
- byte ui = (byte) 'N';\r
- byte uj = (byte) toupper(Alphabet[j]);\r
- byte li = (byte) 'n';\r
- byte lj = (byte) tolower(uj);\r
- ui = (byte) toupper(ui);\r
- uj = (byte) toupper(uj);\r
-\r
- g_SubstMx[ui][uj] = v;\r
- g_SubstMx[uj][ui] = v;\r
-\r
- g_SubstMx[ui][lj] = v;\r
- g_SubstMx[uj][li] = v;\r
-\r
- g_SubstMx[li][uj] = v;\r
- g_SubstMx[lj][ui] = v;\r
-\r
- g_SubstMx[li][lj] = v;\r
- g_SubstMx[lj][li] = v;\r
- }\r
- }\r
+++ /dev/null
-#include "sfasta.h"\r
-#include "orf.h"\r
-#include "alpha.h"\r
-#include "timing.h"\r
-\r
-static inline bool isgap(byte c)\r
- {\r
- return c == '-' || c == '.';\r
- }\r
-\r
-const unsigned BufferSize = 16*1024*1024;\r
-\r
-static unsigned GetMaxPoly(const byte *Seq, unsigned L)\r
- {\r
- byte CurrChar = Seq[0];\r
- unsigned Start = 0;\r
- unsigned MaxLen = 1;\r
- for (unsigned i = 1; i < L; ++i)\r
- {\r
- char c = Seq[i];\r
- if (c != CurrChar || i+1 == L)\r
- {\r
- unsigned Len = i - Start;\r
- if (Len > MaxLen)\r
- MaxLen = Len;\r
- CurrChar = c;\r
- Start = i;\r
- }\r
- }\r
- return MaxLen;\r
- }\r
-\r
-SFasta::SFasta()\r
- {\r
- m_FileName = "";\r
- m_File = 0;\r
- m_Buffer = 0;\r
- m_BufferSize = 0;\r
- m_BufferOffset = 0;\r
- m_BufferBytes = 0;\r
- m_FilePos = 0;\r
- m_FileSize = 0;\r
- m_Label = 0;\r
- m_SeqLength = 0;\r
- m_TooShortCount = 0;\r
- m_TooLongCount = 0;\r
- m_ShortestLength = 0;\r
- m_LongestLength = 0;\r
- m_IsNucleo = false;\r
- m_IsNucleoSet = false;\r
- }\r
-\r
-SFasta::~SFasta()\r
- {\r
- Clear();\r
- }\r
-\r
-void SFasta::Clear()\r
- {\r
- MYFREE(m_Buffer, m_BufferSize, SFasta);\r
- if (m_File != 0)\r
- CloseStdioFile(m_File);\r
-\r
- m_FileName = "";\r
- m_File = 0;\r
- m_Buffer = 0;\r
- m_BufferSize = 0;\r
- m_BufferOffset = 0;\r
- m_BufferBytes = 0;\r
- m_FilePos = 0;\r
- m_FileSize = 0;\r
- m_Label = 0;\r
- m_SeqLength = 0;\r
- m_SeqIndex = UINT_MAX;\r
- m_AllowGaps = false;\r
- m_IsNucleo = false;\r
- m_IsNucleoSet = false;\r
- m_TooShortCount = 0;\r
- m_TooLongCount = 0;\r
- m_ShortestLength = 0;\r
- m_LongestLength = 0;\r
- m_TooPolyCount = 0;\r
- }\r
-\r
-void SFasta::LogMe() const\r
- {\r
- Log("\n");\r
- Log("SFasta::LogMe()\n");\r
- Log("FileName=%s\n", m_FileName.c_str());\r
- Log("FileSize=%u\n", (unsigned) m_FileSize);\r
- Log("FilePos=%u\n", (unsigned) m_FilePos);\r
- Log("BufferSize=%u\n", m_BufferSize);\r
- Log("BufferPos=%u\n", m_BufferOffset);\r
- Log("BufferBytes=%u\n", m_BufferBytes);\r
- if (m_Label == 0)\r
- Log("Label=NULL\n");\r
- else\r
- Log("Label=%s\n", m_Label);\r
- Log("SeqLength=%u\n", m_SeqLength);\r
- }\r
-\r
-const byte *SFasta::GetNextSeq()\r
- {\r
- for (;;)\r
- {\r
- const byte *Seq = GetNextSeqLo();\r
- if (Seq == 0)\r
- {\r
- if (m_TooShortCount > 0)\r
- Warning("%u short sequences (--minlen %u, shortest %u) discarded from %s",\r
- m_TooShortCount, opt_minlen, m_ShortestLength, m_FileName.c_str());\r
- if (m_TooLongCount > 0)\r
- Warning("%u long sequences (--maxlen %u, longest %u) discarded from %s",\r
- m_TooLongCount, opt_maxlen, m_LongestLength, m_FileName.c_str());\r
- if (m_TooPolyCount > 0)\r
- Warning("%u sequences with long homopolymers discarded (--maxpoly %u)",\r
- m_TooPolyCount, opt_maxpoly);\r
- return 0;\r
- }\r
- if (m_SeqLength < opt_minlen)\r
- {\r
- ++m_TooShortCount;\r
- if (m_ShortestLength == 0 || m_SeqLength < m_ShortestLength)\r
- m_ShortestLength = m_SeqLength;\r
- continue;\r
- }\r
- if (m_SeqLength > opt_maxlen && opt_maxlen != 0)\r
- {\r
- if (m_LongestLength == 0 || m_SeqLength > m_LongestLength)\r
- m_LongestLength = m_SeqLength;\r
- ++m_TooLongCount;\r
- continue;\r
- }\r
- return Seq;\r
- }\r
- }\r
-\r
-const byte *SFasta::GetNextSeqLo()\r
- {\r
-// End of cache?\r
- if (m_BufferOffset == m_BufferBytes)\r
- {\r
- // End of file?\r
- if (m_FilePos == m_FileSize)\r
- return 0;\r
- FillCache();\r
- }\r
-\r
- StartTimer(SF_GetNextSeq);\r
- asserta(m_Buffer[m_BufferOffset] == '>');\r
- m_Label = (char *) (m_Buffer + m_BufferOffset + 1);\r
- \r
-//// Scan to end-of-line.\r
-//// Use dubious library function strchr() in the hope\r
-//// that it uses fast machine code.\r
-// byte *ptr = (byte *) strchr(m_Label, '\n');\r
-// asserta(ptr != 0);\r
-// *ptr = 0;\r
-\r
- byte *ptr = 0;\r
- for (unsigned i = m_BufferOffset; i < m_BufferSize; ++i)\r
- {\r
- char c = m_Buffer[i];\r
- if (c == '\n' || c == '\r')\r
- {\r
- ptr = m_Buffer + i;\r
- break;\r
- }\r
- }\r
- asserta(ptr != 0);\r
-\r
- if (opt_trunclabels)\r
- {\r
- for (char *p = m_Label; *p; ++p)\r
- if (isspace(*p))\r
- {\r
- *p = 0;\r
- break;\r
- }\r
- }\r
- else\r
- {\r
- for (char *p = m_Label; *p; ++p)\r
- {\r
- if (*p == '\t')\r
- *p = ' ';\r
- else if (*p == '\r' || *p == '\n')\r
- {\r
- *p = 0;\r
- char NextChar = *(p+1);\r
- if (NextChar == '\r' || NextChar == '\n')\r
- ++p;\r
- break;\r
- }\r
- }\r
- }\r
-\r
-// ptr points to end-of-line.\r
-// Move to start of sequence data.\r
- byte *Seq = ++ptr;\r
-\r
-// Delete white space in-place\r
- byte *To = ptr;\r
- m_BufferOffset = (unsigned) (ptr - m_Buffer);\r
- while (m_BufferOffset < m_BufferBytes)\r
- {\r
- byte c = m_Buffer[m_BufferOffset];\r
- if (c == '>')\r
- {\r
- char prevc = '\n';\r
- if (m_BufferOffset > 0)\r
- prevc = m_Buffer[m_BufferOffset-1];\r
- if (prevc == '\n' || prevc == '\r')\r
- break;\r
- }\r
- ++m_BufferOffset;\r
- if (isalpha(c) || (isgap(c) && m_AllowGaps))\r
- *To++ = c;\r
- else if (c == '\n' || c == '\r')\r
- continue;\r
- else\r
- {\r
- const char *Label = (m_Label == 0 ? "" : m_Label);\r
- static bool WarningDone = false;\r
- if (!WarningDone)\r
- {\r
- if (isgap(c))\r
- Warning("Ignoring gaps in FASTA file '%s'",\r
- m_FileName.c_str());\r
- else if (isprint(c))\r
- Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s",\r
- m_FileName.c_str(), c, Label);\r
- else\r
- Warning("Invalid FASTA file '%s', non-printing byte (hex %02x) in sequence >%s",\r
- m_FileName.c_str(), c, Label);\r
- WarningDone = true;\r
- }\r
- continue;\r
- }\r
- }\r
- m_SeqLength = unsigned(To - Seq);\r
-\r
- if (m_SeqIndex == UINT_MAX)\r
- m_SeqIndex = 0;\r
- else\r
- ++m_SeqIndex;\r
-\r
- EndTimer(SF_GetNextSeq);\r
- return Seq;\r
- }\r
-\r
-void SFasta::Open(const string &FileName)\r
- {\r
- Clear();\r
- m_FileName = FileName;\r
- m_File = OpenStdioFile(FileName);\r
- m_BufferSize = BufferSize;\r
- //m_Buffer = myalloc<byte>(m_BufferSize);\r
- m_Buffer = MYALLOC(byte, m_BufferSize, SFasta);\r
- m_FileSize = GetStdioFileSize(m_File);\r
- }\r
-\r
-void SFasta::Rewind()\r
- {\r
- m_BufferOffset = 0;\r
- m_BufferBytes = 0;\r
- m_FilePos = 0;\r
- }\r
-\r
-bool SFasta::SetIsNucleo()\r
- {\r
- if (m_FilePos != 0)\r
- Die("SFasta::IsNucleo, not at BOF");\r
-\r
- unsigned LetterCount = 0;\r
- unsigned NucleoLetterCount = 0;\r
- for (;;)\r
- {\r
- const byte *Seq = GetNextSeq();\r
- if (Seq == 0)\r
- break;\r
- unsigned L = GetSeqLength();\r
- for (unsigned i = 0; i < L; ++i)\r
- if (g_IsNucleoChar[Seq[i]])\r
- ++NucleoLetterCount;\r
- LetterCount += L;\r
- if (LetterCount > 256)\r
- break;\r
- }\r
- Rewind();\r
- if (LetterCount == 0)\r
- {\r
- m_IsNucleoSet = true;\r
- m_IsNucleo = true;\r
- return true;\r
- }\r
-\r
-// Nucleo if more than 90% nucleo letters AGCTUN\r
- m_IsNucleo = double(NucleoLetterCount)/LetterCount > 0.9;\r
- m_IsNucleoSet = true;\r
- return m_IsNucleo;\r
- }\r
-\r
-void SFasta::FillCache()\r
- {\r
- StartTimer(SF_FillCache);\r
- asserta(m_FilePos < m_FileSize);\r
-\r
-// off_t may be larger type than unsigned, e.g. 64- vs. 32-bit.\r
- off_t otBytesToRead = m_FileSize - m_FilePos;\r
-\r
- bool FinalBuffer = true;\r
- if (otBytesToRead > (off_t) m_BufferSize)\r
- {\r
- FinalBuffer = false;\r
- otBytesToRead = m_BufferSize;\r
- }\r
-\r
- unsigned BytesToRead = unsigned(otBytesToRead);\r
- asserta(BytesToRead > 0);\r
- asserta(BytesToRead <= m_BufferSize);\r
-\r
- SetStdioFilePos(m_File, m_FilePos);\r
- ReadStdioFile(m_File, m_Buffer, BytesToRead);\r
- if (m_Buffer[0] != '>')\r
- {\r
- if (m_FilePos == 0)\r
- Die("Input is not FASTA file");\r
- else\r
- Die("SFasta::FillCache() failed, expected '>'");\r
- }\r
-\r
- m_BufferOffset = 0;\r
-\r
-// If last buffer in file, done\r
- if (FinalBuffer)\r
- {\r
- m_BufferBytes = BytesToRead;\r
- m_FilePos += BytesToRead;\r
- EndTimer(SF_FillCache);\r
- return;\r
- }\r
-\r
-// If not last buffer, truncate any partial sequence\r
-// at end of buffer. Search backwards to find last '>'.\r
- byte *ptr = m_Buffer + BytesToRead - 1;\r
- while (ptr > m_Buffer)\r
- {\r
- if (ptr[0] == '>' && (ptr[-1] == '\n' || ptr[-1] == '\r'))\r
- break;\r
- --ptr;\r
- }\r
-\r
- if (ptr == m_Buffer)\r
- {\r
- LogMe();\r
- if (*ptr != '>')\r
- {\r
- // No '>' found.\r
- // This might techincally be legal FASTA if the entire\r
- // buffer is white space, but strange if not the last buffer\r
- // in the file, so quit anyway.\r
- Die("Failed to find '>' (pos=%u, bytes=%u)",\r
- (unsigned) m_FilePos, BytesToRead);\r
- }\r
- else\r
- {\r
- // Entire buffer is one sequence which may be truncated.\r
- Die("Sequence too long (pos=%u, bytes=%u)",\r
- (unsigned) m_FilePos, BytesToRead);\r
- }\r
- }\r
-\r
- asserta(*ptr == '>');\r
-\r
- m_BufferBytes = unsigned(ptr - m_Buffer);\r
- m_FilePos += m_BufferBytes;\r
-\r
- EndTimer(SF_FillCache);\r
- }\r
-\r
-unsigned SFasta::GetPctDoneX10() const\r
- {\r
- if (m_FilePos == 0 || m_FileSize == 0)\r
- return 0;\r
-\r
- assert(m_FilePos >= (off_t) m_BufferBytes);\r
- off_t BufferStart = m_FilePos - m_BufferBytes;\r
- off_t BufferPos = BufferStart + m_BufferOffset;\r
-\r
- unsigned iPctX10 = unsigned(10.0*double(BufferPos)*100.0/double(m_FileSize));\r
- if (iPctX10 == 0)\r
- return 1;\r
- if (iPctX10 >= 999)\r
- return 998;\r
- return iPctX10;\r
- }\r
-\r
-double SFasta::GetPctDone() const\r
- {\r
- if (m_FilePos == 0 || m_FileSize == 0)\r
- return 0;\r
-\r
- assert(m_FilePos >= (off_t) m_BufferBytes);\r
- off_t BufferStart = m_FilePos - m_BufferBytes;\r
- off_t BufferPos = BufferStart + m_BufferOffset;\r
-\r
- return double(BufferPos)*100.0/double(m_FileSize);\r
- }\r
-\r
-bool SFasta::GetNextSD(SeqData &SD)\r
- {\r
- SD.Seq = GetNextSeq();\r
- if (SD.Seq == 0)\r
- return false;\r
-\r
- SD.Label = GetLabel();\r
- SD.L = GetSeqLength();\r
- SD.Index = GetSeqIndex();\r
- SD.ORFParent = 0;\r
- SD.Nucleo = GetIsNucleo();\r
- SD.RevComp = false;\r
-\r
- return true;\r
- }\r
-\r
-#if TEST\r
-void TestSFasta()\r
- {\r
- SFasta SF;\r
- SF.Open(opt_input);\r
-\r
- if (opt_verbose)\r
- {\r
- Log(" Index Length Label\n");\r
- Log("------- ------- -----\n");\r
- }\r
-\r
- unsigned Index = 0;\r
- unsigned SeqCount = 0;\r
- double LetterCount = 0.0;\r
- ProgressStep(0, 1000, "Reading");\r
- for (;;)\r
- {\r
- const byte *Seq = SF.GetNextSeq();\r
- if (Seq == 0)\r
- break;\r
- ProgressStep(SF.GetPctDoneX10(), 1000, "Reading");\r
- const char *Label = SF.GetLabel();\r
- unsigned L = SF.GetSeqLength();\r
- ++SeqCount;\r
- LetterCount += L;\r
-\r
- if (opt_verbose)\r
- {\r
- Log(">%7u %7u '%s'\n", Index, L, Label);\r
- Log("+%7.7s %7.7s \"%*.*s\"\n", "", "", L, L, Seq);\r
- }\r
-\r
- ++Index;\r
- }\r
- ProgressStep(999, 1000, "Reading");\r
-\r
- Progress("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
- Log("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
- }\r
-#endif // TEST\r
+++ /dev/null
-#ifndef sfasta_h\r
-#define sfasta_h\r
-\r
-#include "myutils.h"\r
-#include "seq.h"\r
-\r
-typedef void (*ON_START_XSEQ)(const SeqData &SD);\r
-typedef void (*ON_END_XSEQ)(const SeqData &SD);\r
-\r
-// Sequential reader for FASTA file format.\r
-// Serves sequences in file order to save memory.\r
-// Caches biggish chunks to compromise memory vs. speed.\r
-class SFasta\r
- {\r
-public:\r
- string m_FileName;\r
- FILE *m_File;\r
- bool m_AllowGaps;\r
-\r
- off_t m_FileSize;\r
-\r
-// Position to start next read\r
- off_t m_FilePos;\r
-\r
-// Cached data.\r
- byte *m_Buffer;\r
-\r
-// Bytes allocated to m_Buffer\r
- unsigned m_BufferSize;\r
-\r
-// Current position in buffer, normally points to '>'\r
- unsigned m_BufferOffset;\r
-\r
-// File data in buffer <= m_BufferSize\r
- unsigned m_BufferBytes;\r
-\r
-// Current label\r
-// Points into m_Buffer, not a separate buffer.\r
- char *m_Label;\r
-\r
-// Current sequence length\r
- unsigned m_SeqLength;\r
-\r
-// Current seq index\r
- unsigned m_SeqIndex;\r
-\r
- unsigned m_ShortestLength;\r
- unsigned m_LongestLength;\r
- unsigned m_TooShortCount;\r
- unsigned m_TooLongCount;\r
- unsigned m_TooPolyCount;\r
-\r
-private:\r
- bool m_IsNucleoSet;\r
- bool m_IsNucleo;\r
-\r
-public:\r
- SFasta();\r
- ~SFasta();\r
-\r
- void Clear();\r
- void Open(const string &FileName);\r
- void Rewind();\r
- bool SetIsNucleo();\r
- bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; };\r
-\r
-// Get next sequence.\r
-// Returns zero on end-of-file\r
- const byte *GetNextSeq();\r
-\r
-// Get next sequence as SeqData object, return false on end-of-file.\r
- bool GetNextSD(SeqData &SD);\r
-\r
-// Length of most recent sequence returned by GetNextSeq().\r
- unsigned GetSeqLength() const { return m_SeqLength; }\r
-\r
-// Label of most recent sequence returned by GetNextSeq().\r
- const char *GetLabel() const { return m_Label; }\r
-\r
-// Index of most recent sequence returned by GetNextSeq().\r
- unsigned GetSeqIndex() const { return m_SeqIndex; }\r
-\r
- unsigned GetPctDoneX10() const;\r
- double GetPctDone() const;\r
-\r
- void LogMe() const;\r
-\r
-private:\r
- void FillCache();\r
- const byte *GetNextSeqLo();\r
- };\r
-\r
-#endif // sfasta_h\r
+++ /dev/null
-"Path: .\n"
-"URL: file:///public/svn/usearch\n"
-"Repository Root: file:///public/svn/usearch\n"
-"Repository UUID: 58640331-1837-4c17-bc3e-636dc59aced1\n"
-"Revision: 34\n"
-"Node Kind: directory\n"
-"Schedule: normal\n"
-"Last Changed Author: bob\n"
-"Last Changed Rev: 34\n"
-"Last Changed Date: 2011-05-01 08:29:04 -0700 (Sun, 01 May 2011)\n"
-"\n"
-"? mk\n"
-"! svnmods.h\n"
-"M ungappedblastid.cpp\n"
-"M chaindisjointhits.cpp\n"
+++ /dev/null
-T(MxBase_Alloc)\r
-T(MxBase_FreeData)\r
-T(MxBase_AllocData)\r
-T(SortSeqIndexes)\r
-T(Alloc_Vectors)\r
-T(MainLoop_NotNW)\r
-T(WriteOutput)\r
-T(NWB)\r
-T(ReadAllStdioFile)\r
-T(Windex_Init)\r
-T(Windex_SetSeqIndex)\r
-T(SeqToWords)\r
-T(SeqToWordsStep)\r
-T(SeqToShortWords)\r
-T(SeqToShortWordsA)\r
-T(SeqToShortWordsB)\r
-T(GetFractIdB)\r
-T(Windex_UniqueWordsAlloc)\r
-T(Windex_UniqueWords)\r
-T(GetPctId)\r
-T(Windex_Reset)\r
-T(GetSig)\r
-T(NWEditDist)\r
-T(EditDist_Myers)\r
-T(EditDist_BlockTarget)\r
-T(NWBand)\r
-T(WordCounting)\r
-T(NWAff)\r
-T(NWAffBand)\r
-T(NWSimple)\r
-T(NWSimpleB)\r
-T(BandWrap)\r
-T(IncIdCounts)\r
-T(GetBestDiagB)\r
-T(GetBestDiagB1)\r
-T(GetBestDiagB2)\r
-T(ClusterInit)\r
-T(ClusterPrep)\r
-T(HotSort1)\r
-T(HotSort2)\r
-T(SortA)\r
-T(SortB)\r
-T(CountSort)\r
-T(AddWords)\r
-T(ClusterWindex)\r
-T(MainInit)\r
-T(Output)\r
-T(WindexTail)\r
-T(WindexExit)\r
-T(Sort)\r
-T(U_AllocSeqLength)\r
-T(U_AllocSeedCount)\r
-T(U_AddSeed)\r
-T(AddSeq)\r
-T(U_SetWordCounts)\r
-T(U_SetWordCountsHash)\r
-T(U_SetWordScores)\r
-T(U_SetHotHits)\r
-T(U_SetHotHitsHash)\r
-T(U_SetHotHitsScores)\r
-T(U_Search)\r
-T(U_SearchExact)\r
-T(WF_SeqToWords)\r
-T(WF_SeqToWordsA)\r
-T(WF_SeqToWordsB)\r
-T(WF_AllocLA)\r
-T(WF_AllocLB)\r
-T(WF_AllocDiags)\r
-T(WF_SetA)\r
-T(WF_SetA_Nb)\r
-T(WF_SetAZero)\r
-T(WF_SetA2)\r
-T(WF_SetB)\r
-T(WF_GetCommonWordCount)\r
-T(WF_GetBestDiag)\r
-T(GetFractIdGivenPath)\r
-T(WX_GetUniqueWords)\r
-T(CompressPath)\r
-T(GetHSPs1)\r
-T(GetHSPs2)\r
-T(AlignHSPs)\r
-T(WF_ResolveHSPs)\r
-T(WX_SetExcludes)\r
-T(ViterbiFast)\r
-T(ViterbiFastBand)\r
-T(ViterbiFastBand0)\r
-T(ViterbiFastBand1)\r
-T(ViterbiFastBand2)\r
-T(ViterbiFastBand3)\r
-T(ViterbiFastBand4)\r
-T(TraceBackBit)\r
-T(TraceBackBitSW)\r
-T(SF_GetNextSeq)\r
-T(SF_FillCache)\r
-T(OnGlobalAccept)\r
-T(UngappedBlast)\r
-T(UngappedBlastId)\r
-T(UngappedBlast2Hit)\r
-T(LogHSPs)\r
-T(BlastOutput)\r
-T(BlastLeft)\r
-T(BlastRight)\r
-T(Blast1)\r
-T(Blast2)\r
-T(Blast3)\r
-T(Blast4)\r
-T(GetBestSeg)\r
-T(SWLinearDP)\r
-T(SWLinearTB)\r
-T(SWLinearDP2)\r
-T(SWLinearTB2)\r
-T(Chain)\r
-T(XlatSeq)\r
-T(XlatSeqToLetters)\r
-T(XDropFwdSimple)\r
-T(XDropFwdFast)\r
-T(XDropFwdFastTB)\r
-T(XDropBwd)\r
-T(SWSimple)\r
-T(PathAlloc)\r
-T(SubPath)\r
-T(SWUngapped)\r
-T(SWFast)\r
-T(SWFastNTB)\r
-T(SWAT_CacheQuery)\r
-T(SWAT_AlignTarget)\r
-T(SWAT_CacheQueryNW)\r
-T(SWAT_AlignTargetNW)\r
-T(SeqDB_FromFasta)\r
-T(LocalUngappedHitToAD)\r
-T(LocalGappedHitToAD)\r
-T(GlobalHitToAD)\r
-T(ResolveOverlaps)\r
-T(GetORFs)\r
-T(ChainCov_AddHit)\r
-T(ChainCov_EndQuery)\r
-T(ChainCov_DoTarget)\r
-T(BuildNb)\r
-T(MakeIntSubstMx)\r
-T(UngappedExtendLeft)\r
-T(UngappedExtendRight)\r
-T(AlignSP)\r
-T(AlignHSP)\r
-\r
-// Background\r
-T(Bg_SearchLoop)\r
-T(Bg_MainInit)\r
-T(Bg_MainTerm)\r
-T(Bg_Other)\r
-T(Bg_1)\r
-T(Bg_2)\r
-T(Bg_3)\r
-T(Bg_4)\r
-T(Bg_5)\r
-T(Bg_6)\r
-T(Bg_7)\r
-T(Bg_8)\r
-T(Bg_9)\r
-T(Bg_XFrame2)\r
-T(Bg_Usearch1)\r
-T(Bg_Usearch2)\r
-T(Bg_Usearch3)\r
-T(Bg_Usearch4)\r
-T(Bg_Hot)\r
-\r
-// For Timer2\r
-T(Search_2)\r
-T(Search_Loop_2)\r
-T(Search_InnerLoop_2)\r
-T(OnHit_2)\r
-T(UngappedBlast_2)\r
-T(MainInit_2)\r
-T(MainTerm_2)\r
+++ /dev/null
-#define TIMING 0
-#ifndef timing_h
-#define timing_h
-
-#define BG_TIMING 0
-
-#if !TIMING
-#undef BG_TIMING
-#define BG_TIMING 0
-#endif
-
-#if UCHIMES
-#undef TIMING
-#define TIMING 0
-#endif
-
-#if TIMING
-
-enum TIMER
- {
- TIMER_None,
-#define T(x) TIMER_##x,
-#include "timers.h"
-#undef T
- };
-
-const unsigned TimerCount =
- 1 // TIMER_None
-#define T(x) +1
-#include "timers.h"
-#undef T
- ;
-
-enum COUNTER
- {
-#define C(x) COUNTER_##x,
-#include "counters.h"
-#undef C
- };
-
-enum ALLOCER
- {
-#define A(x) ALLOCER_##x,
-#include "allocs.h"
-#undef A
- };
-
-const unsigned CounterCount =
-#define C(x) +1
-#include "counters.h"
-#undef C
- ;
-
-const unsigned AllocerCount =
-#define A(x) +1
-#include "allocs.h"
-#undef A
- ;
-
-#ifdef _MSC_VER
-
-typedef unsigned __int64 TICKS;
-
-#pragma warning(disable:4035)
-inline TICKS GetClockTicks()
- {
- _asm
- {
- _emit 0x0f
- _emit 0x31
- }
- }
-
-#else // ifdef _MSC_VER
-
-typedef uint64_t TICKS;
-__inline__ uint64_t GetClockTicks()
- {
- uint32_t lo, hi;
- /* We cannot use "=A", since this would use %rax on x86_64 */
- __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
- return (uint64_t)hi << 32 | lo;
- }
-
-#endif // ifdef _MSC_VER
-
-//void AddTicks(const string &Name, TICKS Ticks1, TICKS Ticks2);
-//void AddBytes(const string &Name, double Bytes);
-//#define SubBytes(Name, Bytes) AddBytes(Name, -double(Bytes))
-
-const char *TimerToStr(TIMER t);
-
-extern TICKS g_BeginTicks[TimerCount];
-extern double g_TotalTicks[TimerCount];
-extern double g_TotalCounts[TimerCount];
-extern double g_Counters[CounterCount];
-extern unsigned g_AllocNewCount[AllocerCount];
-extern unsigned g_AllocFreeCount[AllocerCount];
-extern double g_AllocNewBytes[AllocerCount];
-extern double g_AllocFreeBytes[AllocerCount];
-extern double g_AllocNetBytes[AllocerCount];
-extern double g_AllocPeakBytes[AllocerCount];
-extern bool g_Timer2[TimerCount];
-extern TIMER g_CurrTimer;
-#if BG_TIMING
-extern TIMER g_BackgroundTimer;
-#endif
-
-#define MYALLOC(Type, N, Name) (Type *) MyAlloc_((N)*sizeof(Type), ALLOCER_##Name, __FILE__, __LINE__)
-#define MYFREE(Array, N, Name) MyFree_(Array, N*sizeof(Array[0]), ALLOCER_##Name, __FILE__, __LINE__)
-
-inline void *MyAlloc_(unsigned Bytes, unsigned a, const char *FileName, int Line)
- {
- ++g_AllocNewCount[a];
- g_AllocNewBytes[a] += Bytes;
- g_AllocNetBytes[a] += Bytes;
- if (g_AllocNetBytes[a] > g_AllocPeakBytes[a])
- g_AllocPeakBytes[a] = g_AllocNetBytes[a];
- return mymalloc(Bytes);
- }
-
-inline void MyFree_(void *p, unsigned Bytes, unsigned a, const char *FileName, int Line)
- {
- ++g_AllocFreeCount[a];
- g_AllocFreeBytes[a] += Bytes;
- g_AllocNetBytes[a] -= Bytes;
- myfree2(p, Bytes);
- }
-
-#if BG_TIMING
-inline void SetBackgroundTimer_(TIMER Timer)
- {
- TICKS Now = GetClockTicks();
- if (g_BeginTicks[g_BackgroundTimer] != 0)
- {
- ++g_TotalCounts[g_BackgroundTimer];
- g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
- }
- g_BackgroundTimer = Timer;
- g_BeginTicks[Timer] = Now;
- }
-#else
-#define SetBackgroundTimer_(Timer) /* empty */
-#endif
-
-inline void StartTimer_(TIMER Timer)
- {
- if (g_CurrTimer != TIMER_None)
- Die("StartTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
-
- TICKS Now = GetClockTicks();
-#if BG_TIMING
- if (g_BeginTicks[g_BackgroundTimer] != 0)
- {
- ++g_TotalCounts[g_BackgroundTimer];
- g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
- }
-#endif
- g_BeginTicks[Timer] = Now;
- g_CurrTimer = Timer;
- }
-
-inline void PauseTimer_(TIMER Timer)
- {
- if (Timer != g_CurrTimer)
- Die("PauseTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
-
- TICKS Now = GetClockTicks();
- g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
- g_BeginTicks[Timer] = Now;
- g_CurrTimer = TIMER_None;
- }
-
-inline void EndTimer_(TIMER Timer)
- {
- if (Timer != g_CurrTimer)
- Die("EndTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
-
- TICKS Now = GetClockTicks();
-#if BG_TIMING
- g_BeginTicks[g_BackgroundTimer] = Now;
-#endif
- g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
- ++g_TotalCounts[Timer];
- g_CurrTimer = TIMER_None;
- }
-
-inline void StartTimer2_(TIMER Timer)
- {
- g_Timer2[Timer] = true;
- g_BeginTicks[Timer] = GetClockTicks();
- }
-
-inline void EndTimer2_(TIMER Timer)
- {
- g_TotalTicks[Timer] += double(GetClockTicks() - g_BeginTicks[Timer]);
- ++g_TotalCounts[Timer];
- }
-
-#define AddCounter(x, N) g_Counters[COUNTER_##x] += N
-#define IncCounter(x) ++(g_Counters[COUNTER_##x])
-#define StartTimer(x) StartTimer_(TIMER_##x)
-#define PauseTimer(x) PauseTimer_(TIMER_##x)
-#define EndTimer(x) EndTimer_(TIMER_##x)
-#define StartTimer2(x) StartTimer2_(TIMER_##x)
-#define EndTimer2(x) EndTimer2_(TIMER_##x)
-
-#if BG_TIMING
-#define SetBackgroundTimer(x) SetBackgroundTimer_(TIMER_##x)
-#else
-#define SetBackgroundTimer(x) /* empty */
-#endif
-
-#else // if TIMING
-
-#define AddCounter(x, N) /* empty */
-#define IncCounter(x) /* empty */
-#define StartTimer(x) /* empty */
-#define PauseTimer(x) /* empty */
-#define EndTimer(x) /* empty */
-#define StartTimer2(x) /* empty */
-#define PauseTimer2(x) /* empty */
-#define EndTimer2(x) /* empty */
-#define SetBackgroundTimer(x) /* empty */
-#define MYALLOC(Type, N, Name) myalloc(Type, N)
-#define MYFREE(Array, N, Name) myfree(Array)
-
-#endif // if TIMING
-
-void LogMemStats();
-void LogTickStats();
-void LogStats();
-void LogAllocs();
-
-#define AddBytes(x, n) /* empty */
-#define SubBytes(x, n) /* empty */
-
-#endif // if timing_h
+++ /dev/null
-#include "dp.h"
-
-#define TRACE 0
-
-Mx<byte> g_Mx_TBBit;
-byte **g_TBBit;
-float *g_DPRow1;
-float *g_DPRow2;
-static float *g_DPBuffer1;
-static float *g_DPBuffer2;
-
-static unsigned g_CacheLB;
-
-void AllocBit(unsigned LA, unsigned LB)
- {
- g_Mx_TBBit.Alloc("TBBit", LA+1, LB+1);
- g_TBBit = g_Mx_TBBit.GetData();
- if (LB > g_CacheLB)
- {
- MYFREE(g_DPBuffer1, g_CacheLB, AllocBit);
- MYFREE(g_DPBuffer2, g_CacheLB, AllocBit);
-
- g_CacheLB = LB + 128;
-
- // Allow use of [-1]
- //g_DPBuffer1 = myalloc<float>(g_CacheLB+3);
- //g_DPBuffer2 = myalloc<float>(g_CacheLB+3);
- g_DPBuffer1 = MYALLOC(float, g_CacheLB+3, AllocBit);
- g_DPBuffer2 = MYALLOC(float, g_CacheLB+3, AllocBit);
- g_DPRow1 = g_DPBuffer1 + 1;
- g_DPRow2 = g_DPBuffer2 + 1;
- }
- }
-
-void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD)
- {
- PD.Alloc(LA+LB);
-
- StartTimer(TraceBackBit);
- char *PathPtr = PD.Back;
- *PathPtr = 0;
-
- byte **TB = g_TBBit;
-
-#if TRACE
- Log("\n");
- Log("TraceBackBit\n");
-#endif
-
- size_t i = LA;
- size_t j = LB;
- for (;;)
- {
-#if TRACE
- Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State);
-#endif
- if (i == 0 && j == 0)
- break;
-
- --PathPtr;
- *PathPtr = State;
-
- byte t;
- switch (State)
- {
- case 'M':
- asserta(i > 0 && j > 0);
- t = TB[i-1][j-1];
- if (t & TRACEBITS_DM)
- State = 'D';
- else if (t & TRACEBITS_IM)
- State = 'I';
- else
- State = 'M';
- --i;
- --j;
- break;
- case 'D':
- asserta(i > 0);
- t = TB[i-1][j];
- if (t & TRACEBITS_MD)
- State = 'M';
- else
- State = 'D';
- --i;
- break;
-
- case 'I':
- asserta(j > 0);
- t = TB[i][j-1];
- if (t & TRACEBITS_MI)
- State = 'M';
- else
- State = 'I';
- --j;
- break;
-
- default:
- Die("TraceBackBit, invalid state %c", State);
- }
- }
- PD.Start = PathPtr;
- EndTimer(TraceBackBit);
- }
-
-void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,
- unsigned &Leni, unsigned &Lenj, PathData &PD)
- {
- PD.Alloc(LA+LB);
-
- StartTimer(TraceBackBitSW);
- char *PathPtr = PD.Back;
- *PathPtr = 0;
-
- byte **TB = g_TBBit;
-
-#if TRACE
- Log("\n");
- Log("TraceBackBitSW\n");
-#endif
-
- unsigned i = Besti;
- unsigned j = Bestj;
- char State = 'M';
- for (;;)
- {
-#if TRACE
- Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State);
-#endif
- --PathPtr;
- *PathPtr = State;
-
- byte t;
- switch (State)
- {
- case 'M':
- asserta(i > 0 && j > 0);
- t = TB[i-1][j-1];
- if (t & TRACEBITS_DM)
- State = 'D';
- else if (t & TRACEBITS_IM)
- State = 'I';
- else if (t & TRACEBITS_SM)
- {
- Leni = Besti - i + 1;
- Lenj = Bestj - j + 1;
- PD.Start = PathPtr;
- EndTimer(TraceBackBitSW);
- return;
- }
- else
- State = 'M';
- --i;
- --j;
- break;
- case 'D':
- asserta(i > 0);
- t = TB[i-1][j];
- if (t & TRACEBITS_MD)
- State = 'M';
- else
- State = 'D';
- --i;
- break;
-
- case 'I':
- asserta(j > 0);
- t = TB[i][j-1];
- if (t & TRACEBITS_MI)
- State = 'M';
- else
- State = 'I';
- --j;
- break;
-
- default:
- Die("TraceBackBitSW, invalid state %c", State);
- }
- }
- }
+++ /dev/null
-#ifndef uc_h\r
-#define uc_h\r
-\r
-#include "seqdb.h"\r
-#include "seq.h"\r
-#include "path.h"\r
-\r
-struct AlnData;\r
-\r
-int uchime_main(int, char**); \r
-\r
-class UCFile\r
- {\r
-public:\r
- FILE *m_File;\r
- byte *m_Data;\r
- vector<char> m_RecTypes;\r
- vector<float> m_PctIds;\r
- vector<const char *> m_Labels;\r
- vector<const char *> m_SeedLabels;\r
- vector<unsigned> m_SeedIndexes;\r
- vector<const char *> m_CompressedPaths;\r
- vector<unsigned> m_SeqLengths;\r
- vector<unsigned> m_SortOrder;\r
- vector<char> m_Strands;\r
- vector<unsigned> m_Los;\r
- vector<unsigned> m_SeedLos;\r
-\r
-public:\r
- UCFile();\r
- void Clear(bool ctor = false);\r
- void Close();\r
- void FromFile(const string &FileName);\r
- void FromClstr(const string &FileName);\r
- void ToFile(const string &FileName);\r
- unsigned GetRecordCount() const;\r
- void LogMe() const;\r
- void ToClstr(const string &FileName);\r
- void ToFasta(const string &FileName, const SeqDB &Input, bool Reformat);\r
- void Create(const string &FileName);\r
- void Sort();\r
- void Flush() const;\r
-\r
- void WriteNotMatched(unsigned L, const char *Label) const;\r
- void WriteLibSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
- void WriteNewSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
- void WriteHit(const SeqData &SA, const SeqData &SB, double FractId,\r
- const PathData &PD) const;\r
- void WriteReject(const SeqData &SA, const SeqData &SB, double FractId,\r
- const char *Path) const;\r
- void WriteHit(unsigned SeedIndex, unsigned L, double PctId,\r
- const char *CompressedPath, char Strand, unsigned Lo, unsigned SeedLo,\r
- const char *Label, const char *SeedLabel) const;\r
- void WriteHit(const AlnData &AD);\r
- void WriteLibCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
- const char *Label) const;\r
- void WriteNewCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
- const char *Label) const;\r
- void WriteSeqX(FILE *f, const byte *Seq, unsigned L, const char *CompressedPath) const;\r
- };\r
-\r
-#endif // uc_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "chime.h"\r
-#include "seqdb.h"\r
-#include "dp.h"\r
-#include "ultra.h"\r
-#include "hspfinder.h"\r
-#include <algorithm>\r
-#include <set>\r
-\r
-bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
- const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
- float MinFractId, ChimeHit2 &Hit);\r
-\r
-FILE *g_fUChime;\r
-FILE *g_fUChimeAlns;\r
-const vector<float> *g_SortVecFloat;\r
-bool g_UchimeDeNovo = false;\r
-\r
-void Usage()\r
- {\r
- printf("\n");\r
- printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
- printf("http://www.drive5.com/uchime\n");\r
- printf("\n");\r
- printf("This software is donated to the public domain\n");\r
- printf("\n");\r
-\r
- printf(\r
-#include "help.h"\r
- );\r
- }\r
-\r
-void SetBLOSUM62()\r
- {\r
- Die("SetBLOSUM62 not implemented");\r
- }\r
-\r
-void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
- {\r
- Die("ReadSubstMx not implemented");\r
- }\r
-\r
-void LogAllocs()\r
- {\r
- /*empty*/\r
- }\r
-\r
-static bool CmpDescVecFloat(unsigned i, unsigned j)\r
- {\r
- return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
- }\r
-\r
-void Range(vector<unsigned> &v, unsigned N)\r
- {\r
- v.clear();\r
- v.reserve(N);\r
- for (unsigned i = 0; i < N; ++i)\r
- v.push_back(i);\r
- }\r
-\r
-void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
- {\r
- StartTimer(Sort);\r
- const unsigned N = SIZE(Values);\r
- Range(Order, N);\r
- g_SortVecFloat = &Values;\r
- sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
- EndTimer(Sort);\r
- }\r
-\r
-float GetAbFromLabel(const string &Label)\r
- {\r
- vector<string> Fields;\r
- Split(Label, Fields, '/');\r
- const unsigned N = SIZE(Fields);\r
- for (unsigned i = 0; i < N; ++i)\r
- {\r
- const string &Field = Fields[i];\r
- if (Field.substr(0, 3) == "ab=")\r
- {\r
- string a = Field.substr(3, string::npos);\r
- return (float) atof(a.c_str());\r
- }\r
- }\r
- if (g_UchimeDeNovo)\r
- Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
- return 0.0;\r
- }\r
-\r
-int uchime_main(int argc, char *argv[])\r
- {\r
- \r
- MyCmdLine(argc, argv);\r
-\r
- if (argc < 2)\r
- {\r
- Usage();\r
- return 0;\r
- }\r
-\r
- if (opt_version)\r
- {\r
- printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
- return 0;\r
- }\r
-\r
- printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
- printf("by Robert C. Edgar\n");\r
- printf("http://drive5.com/uchime\n");\r
- printf("This code is donated to the public domain.\n");\r
- printf("\n");\r
- if (!optset_w)\r
- opt_w = 8;\r
- \r
- float MinFractId = 0.95f;\r
- if (optset_id)\r
- MinFractId = (float) opt_id;\r
-\r
- Log("%8.2f minh\n", opt_minh);\r
- Log("%8.2f xn\n", opt_xn);\r
- Log("%8.2f dn\n", opt_dn);\r
- Log("%8.2f xa\n", opt_xa);\r
- Log("%8.2f mindiv\n", opt_mindiv);\r
- Log("%8u maxp\n", opt_maxp);\r
-\r
- if (opt_input == "" && opt_uchime != "")\r
- opt_input = opt_uchime;\r
-\r
- if (opt_input == "")\r
- Die("Missing --input");\r
-\r
- g_UchimeDeNovo = (opt_db == "");\r
-\r
- if (opt_uchimeout != "")\r
- g_fUChime = CreateStdioFile(opt_uchimeout);\r
-\r
- if (opt_uchimealns != "")\r
- g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
-\r
- SeqDB Input;\r
- SeqDB DB;\r
-\r
- Input.FromFasta(opt_input);\r
- if (!Input.IsNucleo())\r
- Die("Input contains amino acid sequences");\r
-\r
- const unsigned QuerySeqCount = Input.GetSeqCount();\r
- vector<unsigned> Order;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- Order.push_back(i);\r
-\r
- if (g_UchimeDeNovo)\r
- {\r
- vector<float> Abs;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- {\r
- const char *Label = Input.GetLabel(i);\r
- float Ab = GetAbFromLabel(Label);\r
- Abs.push_back(Ab);\r
- }\r
- SortDescending(Abs, Order);\r
- DB.m_IsNucleoSet = true;\r
- DB.m_IsNucleo = true;\r
- }\r
- else\r
- {\r
- DB.FromFasta(opt_db);\r
- if (!DB.IsNucleo())\r
- Die("Database contains amino acid sequences");\r
- }\r
-\r
- vector<ChimeHit2> Hits;\r
- unsigned HitCount = 0;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- {\r
- unsigned QuerySeqIndex = Order[i];\r
-\r
- SeqData QSD;\r
- Input.GetSeqData(QuerySeqIndex, QSD);\r
-\r
- float QAb = -1.0;\r
- if (g_UchimeDeNovo)\r
- QAb = GetAbFromLabel(QSD.Label);\r
-\r
- ChimeHit2 Hit;\r
- AlnParams &AP = *(AlnParams *) 0;\r
- AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
- HSPFinder &HF = *(HSPFinder *) 0;\r
- bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
- if (Found)\r
- ++HitCount;\r
- else\r
- {\r
- if (g_UchimeDeNovo)\r
- DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
- }\r
-\r
- WriteChimeHit(g_fUChime, Hit);\r
-\r
- ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
- }\r
-\r
- Log("\n");\r
- Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
- opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
-\r
- CloseStdioFile(g_fUChime);\r
- CloseStdioFile(g_fUChimeAlns);\r
-\r
- ProgressExit();\r
- return 0;\r
- }\r
+++ /dev/null
-#ifndef ultra_h
-#define ultra_h
-
-#include "seqdb.h"
-#define Ultra SeqDB
-#define GetSeedLabel GetLabel
-
-#endif // ultra_h
+++ /dev/null
-//#if UCHIMES\r
-\r
-#include "myutils.h"\r
-#include "seqdb.h"\r
-#include "seq.h"\r
-#include "alpha.h"\r
-\r
-void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
-\r
-static byte *g_QueryHasWord;\r
-static unsigned g_WordCount;\r
-\r
-unsigned GetWord(const byte *Seq)\r
- {\r
- unsigned Word = 0;\r
- const byte *Front = Seq;\r
- for (unsigned i = 0; i < opt_w; ++i)\r
- {\r
- unsigned Letter = g_CharToLetterNucleo[*Front++];\r
- Word = (Word*4) + Letter;\r
- }\r
- return Word;\r
- }\r
-\r
-static void SetQuery(const SeqData &Query)\r
- {\r
- if (g_QueryHasWord == 0)\r
- {\r
- g_WordCount = 4;\r
- for (unsigned i = 1; i < opt_w; ++i)\r
- g_WordCount *= 4;\r
-\r
- g_QueryHasWord = myalloc(byte, g_WordCount);\r
- }\r
-\r
- memset(g_QueryHasWord, 0, g_WordCount);\r
-\r
- if (Query.L <= opt_w)\r
- return;\r
-\r
- const unsigned L = Query.L - opt_w + 1;\r
- const byte *Seq = Query.Seq;\r
- for (unsigned i = 0; i < L; ++i)\r
- {\r
- unsigned Word = GetWord(Seq++);\r
- g_QueryHasWord[Word] = 1;\r
- }\r
- }\r
-\r
-static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
- {\r
- if (Target.L <= opt_w)\r
- return 0;\r
-\r
- unsigned Count = 0;\r
- const unsigned L = Target.L - opt_w + 1;\r
- const byte *Seq = Target.Seq;\r
- for (unsigned i = 0; i < L; ++i)\r
- {\r
- unsigned Word = GetWord(Seq++);\r
- if (g_QueryHasWord[Word])\r
- ++Count;\r
- }\r
- return Count;\r
- }\r
-\r
-void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
- vector<unsigned> &Order)\r
- {\r
- WordCounts.clear();\r
- Order.clear();\r
-\r
- SetQuery(Query);\r
-\r
- const unsigned SeqCount = DB.GetSeqCount();\r
- for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
- {\r
- SeqData Target;\r
- DB.GetSeqData(SeqIndex, Target);\r
- float WordCount = (float) GetUniqueWordsInCommon(Target);\r
- WordCounts.push_back(WordCount);\r
- }\r
- SortDescending(WordCounts, Order);\r
- }\r
-\r
-//#endif // UCHIMES\r
+++ /dev/null
-#include "dp.h"
-#include "out.h"
-#include "evalue.h"
-
-#define CMP_SIMPLE 0
-\r
-#if SAVE_FAST
-static Mx<float> g_MxDPM;
-static Mx<float> g_MxDPD;
-static Mx<float> g_MxDPI;
-
-static Mx<char> g_MxTBM;
-static Mx<char> g_MxTBD;
-static Mx<char> g_MxTBI;
-
-static float **g_DPM;
-static float **g_DPD;
-static float **g_DPI;
-
-static char **g_TBM;
-static char **g_TBD;
-static char **g_TBI;
-
-#if CMP_SIMPLE
-static Mx<float> *g_DPMSimpleMx;
-static Mx<float> *g_DPDSimpleMx;
-static Mx<float> *g_DPISimpleMx;
-static float **g_DPMSimple;
-static float **g_DPDSimple;
-static float **g_DPISimple;
-
-#define cmpm(i, j, x) { if (!feq(x, g_DPMSimple[i][j])) \
- { \
- Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
- __FILE__, __LINE__, x, i, j, g_DPMSimple[i][j]); \
- } \
- }
-
-#define cmpd(i, j, x) { if (!feq(x, g_DPDSimple[i][j])) \
- { \
- Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
- __FILE__, __LINE__, x, i, j, g_DPDSimple[i][j]); \
- } \
- }
-
-#define cmpi(i, j, x) { if (!feq(x, g_DPISimple[i][j])) \
- { \
- Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
- __FILE__, __LINE__, x, i, j, g_DPISimple[i][j]); \
- } \
- }
-
-#else
-
-#define cmpm(i, j, x) /* empty */
-#define cmpd(i, j, x) /* empty */
-#define cmpi(i, j, x) /* empty */
-
-#endif
-
-static void AllocSave(unsigned LA, unsigned LB)
- {
-#if CMP_SIMPLE
- GetSimpleDPMxs(&g_DPMSimpleMx, &g_DPDSimpleMx, &g_DPISimpleMx);
- g_DPMSimple = g_DPMSimpleMx->GetData();
- g_DPDSimple = g_DPDSimpleMx->GetData();
- g_DPISimple = g_DPISimpleMx->GetData();
-#endif
- g_MxDPM.Alloc("FastM", LA+1, LB+1);\r
- g_MxDPD.Alloc("FastD", LA+1, LB+1);\r
- g_MxDPI.Alloc("FastI", LA+1, LB+1);\r
-\r
- g_MxTBM.Alloc("FastTBM", LA+1, LB+1);\r
- g_MxTBD.Alloc("FastTBD", LA+1, LB+1);\r
- g_MxTBI.Alloc("FastTBI", LA+1, LB+1);\r
-\r
- g_DPM = g_MxDPM.GetData();\r
- g_DPD = g_MxDPD.GetData();\r
- g_DPI = g_MxDPI.GetData();\r
-\r
- g_TBM = g_MxTBM.GetData();\r
- g_TBD = g_MxTBD.GetData();\r
- g_TBI = g_MxTBI.GetData();\r
- }
-
-static void SAVE_DPM(unsigned i, unsigned j, float x)
- {
- g_DPM[i][j] = x;
-#if CMP_SIMPLE
- if (i > 0 && j > 0)
- asserta(feq(x, g_DPMSimple[i][j]));
-#endif
- }
-
-static void SAVE_DPD(unsigned i, unsigned j, float x)
- {
- g_DPD[i][j] = x;
-#if CMP_SIMPLE
- if (i > 0 && j > 0)
- asserta(feq(x, g_DPDSimple[i][j]));
-#endif
- }
-
-static void SAVE_DPI(unsigned i, unsigned j, float x)
- {
- g_DPI[i][j] = x;
-#if CMP_SIMPLE
- if (i > 0 && j > 0)
- asserta(feq(x, g_DPISimple[i][j]));
-#endif
- }
-
-static void SAVE_TBM(unsigned i, unsigned j, char x)
- {
- g_TBM[i][j] = x;
- }
-
-static void SAVE_TBD(unsigned i, unsigned j, char x)
- {
- g_TBD[i][j] = x;
- }
-
-static void SAVE_TBI(unsigned i, unsigned j, char x)
- {
- g_TBI[i][j] = x;
- }
-
-void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I)
- {
- *M = &g_MxDPM;
- *D = &g_MxDPD;
- *I = &g_MxDPI;
- }
-
-#else // SAVE_FAST
-
-#define SAVE_DPM(i, j, x) /* empty */
-#define SAVE_DPD(i, j, x) /* empty */
-#define SAVE_DPI(i, j, x) /* empty */
-
-#define SAVE_TBM(i, j, x) /* empty */
-#define SAVE_TBD(i, j, x) /* empty */
-#define SAVE_TBI(i, j, x) /* empty */
-
-#define AllocSave(LA, LB) /* empty */
-
-#define cmpm(i, j, x) /* empty */
-#define cmpd(i, j, x) /* empty */
-#define cmpi(i, j, x) /* empty */
-
-#endif // SAVE_FAST
-
-float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,
- const AlnParams &AP, PathData &PD)
- {
- if (LA*LB > 100*1000*1000)
- Die("ViterbiFast, too long LA=%u, LB=%u", LA, LB);
-
- AllocBit(LA, LB);
- AllocSave(LA, LB);
-
- StartTimer(ViterbiFast);
-
- const float * const *Mx = AP.SubstMx;
- float OpenA = AP.LOpenA;
- float ExtA = AP.LExtA;
-
- byte **TB = g_TBBit;
- float *Mrow = g_DPRow1;
- float *Drow = g_DPRow2;
-
-// Use Mrow[-1], so...
- Mrow[-1] = MINUS_INFINITY;
- for (unsigned j = 0; j <= LB; ++j)
- {
- Mrow[j] = MINUS_INFINITY;
- SAVE_DPM(0, j, MINUS_INFINITY);
- SAVE_TBM(0, j, '?');
-
- Drow[j] = MINUS_INFINITY;
- SAVE_DPD(0, j, MINUS_INFINITY);
- SAVE_TBD(0, j, '?');
- }
-
-// Main loop
- float M0 = float (0);
- SAVE_DPM(0, 0, 0);
- for (unsigned i = 0; i < LA; ++i)
- {
- byte a = A[i];
- const float *MxRow = Mx[a];
- float OpenB = AP.LOpenB;
- float ExtB = AP.LExtB;
- float I0 = MINUS_INFINITY;
-
- SAVE_TBM(i, 0, '?');
-
- SAVE_DPI(i, 0, MINUS_INFINITY);
- SAVE_DPI(i, 1, MINUS_INFINITY);
-
- SAVE_TBI(i, 0, '?');
- SAVE_TBI(i, 1, '?');
-
- byte *TBrow = TB[i];
- for (unsigned j = 0; j < LB; ++j)
- {
- byte b = B[j];
- byte TraceBits = 0;
- float SavedM0 = M0;
-
- // MATCH
- {
- // M0 = DPM[i][j]
- // I0 = DPI[i][j]
- // Drow[j] = DPD[i][j]
- cmpm(i, j, M0);
- cmpd(i, j, Drow[j]);
- cmpi(i, j, I0);
-
- float xM = M0;
- SAVE_TBM(i+1, j+1, 'M');
- if (Drow[j] > xM)
- {
- xM = Drow[j];
- TraceBits = TRACEBITS_DM;
- SAVE_TBM(i+1, j+1, 'D');
- }
- if (I0 > xM)
- {
- xM = I0;
- TraceBits = TRACEBITS_IM;
- SAVE_TBM(i+1, j+1, 'I');
- }
- M0 = Mrow[j];
- cmpm(i, j+1, M0);
-
- Mrow[j] = xM + MxRow[b];
- // Mrow[j] = DPM[i+1][j+1])
- SAVE_DPM(i+1, j+1, Mrow[j]);
- }
-
- // DELETE
- {
- // SavedM0 = DPM[i][j]
- // Drow[j] = DPD[i][j]
- cmpm(i, j, SavedM0);
- cmpd(i, j, Drow[j]);
-
- float md = SavedM0 + OpenB;
- Drow[j] += ExtB;
- SAVE_TBD(i+1, j, 'D');
- if (md >= Drow[j])
- {
- Drow[j] = md;
- TraceBits |= TRACEBITS_MD;
- SAVE_TBD(i+1, j, 'M');
- }
- // Drow[j] = DPD[i+1][j]
- SAVE_DPD(i+1, j, Drow[j]);
- }
-
- // INSERT
- {
- // SavedM0 = DPM[i][j]
- // I0 = DPI[i][j]
- cmpm(i, j, SavedM0);
- cmpi(i, j, I0);
-
- float mi = SavedM0 + OpenA;
- I0 += ExtA;
- SAVE_TBI(i, j+1, 'I');
- if (mi >= I0)
- {
- I0 = mi;
- TraceBits |= TRACEBITS_MI;
- SAVE_TBI(i, j+1, 'M');
- }
- // I0 = DPI[i][j+1]
- SAVE_DPI(i, j+1, I0);
- }
-
- OpenB = AP.OpenB;
- ExtB = AP.ExtB;
-
- TBrow[j] = TraceBits;
- }
-
- // Special case for end of Drow[]
- {
- // M0 = DPM[i][LB]
- // Drow[LB] = DPD[i][LB]
-
- TBrow[LB] = 0;
- float md = M0 + AP.ROpenB;
- Drow[LB] += AP.RExtB;
- SAVE_TBD(i+1, LB, 'D');
- if (md >= Drow[LB])
- {
- Drow[LB] = md;
- TBrow[LB] = TRACEBITS_MD;
- SAVE_TBD(i+1, LB, 'M');
- }
- // Drow[LB] = DPD[i+1][LB]
- SAVE_DPD(i+1, LB, Drow[LB]);
- }
-
- SAVE_DPM(i+1, 0, MINUS_INFINITY);
- M0 = MINUS_INFINITY;
-
- OpenA = AP.OpenA;
- ExtA = AP.ExtA;
- }
-
- SAVE_TBM(LA, 0, '?');
-
-// Special case for last row of DPI
- byte *TBrow = TB[LA];
- float I1 = MINUS_INFINITY;
-
- SAVE_DPI(LA, 0, MINUS_INFINITY);
- SAVE_TBI(LA, 0, '?');
-
- SAVE_DPI(LA, 1, MINUS_INFINITY);
- SAVE_TBI(LA, 1, '?');
-
- for (unsigned j = 1; j < LB; ++j)
- {
- // Mrow[j-1] = DPM[LA][j]
- // I1 = DPI[LA][j]
-
- TBrow[j] = 0;
- float mi = Mrow[int(j)-1] + AP.ROpenA;
- I1 += AP.RExtA;
- SAVE_TBI(LA, j+1, 'I');
- if (mi > I1)
- {
- I1 = mi;
- TBrow[j] = TRACEBITS_MI;
- SAVE_TBI(LA, j+1, 'M');
- }
- SAVE_DPI(LA, j+1, I1);
- }
-
- float FinalM = Mrow[LB-1];
- float FinalD = Drow[LB];
- float FinalI = I1;
-// FinalM = DPM[LA][LB]
-// FinalD = DPD[LA][LB]
-// FinalI = DPI[LA][LB]
-
- float Score = FinalM;
- byte State = 'M';
- if (FinalD > Score)
- {
- Score = FinalD;
- State = 'D';
- }
- if (FinalI > Score)
- {
- Score = FinalI;
- State = 'I';
- }
-
- EndTimer(ViterbiFast);
- TraceBackBit(LA, LB, State, PD);
-
-#if SAVE_FAST
- g_MxDPM.LogMe();
- g_MxDPD.LogMe();
- g_MxDPI.LogMe();
-
- g_MxTBM.LogMe();
- g_MxTBD.LogMe();
- g_MxTBI.LogMe();
-#endif
-
- return Score;
- }
+++ /dev/null
-#ifndef windex_h\r
-#define windex_h\r
-\r
-class SFasta;\r
-struct SeqDB;\r
-\r
-typedef uint32 word_t;\r
-typedef uint16 wordcount_t;\r
-typedef uint32 arrsize_t;\r
-typedef uint16 seqcountperword_t;\r
-typedef uint32 seqindex_t;\r
-typedef uint16 commonwordcount_t;\r
-\r
-const uint32 WindexFileHdr_Magic1 = 0x312DE41;\r
-const uint32 WindexFileHdr_Magic2 = 0x312DE42;\r
-const uint32 WindexFileHdr_Magic3 = 0x312DE43;\r
-const uint32 WindexFileHdr_Magic4 = 0x312DE44;\r
-\r
-struct WindexFileHdr\r
- {\r
- uint32 Magic1;\r
- uint32 IsNucleo;\r
- uint32 WordLength;\r
- uint32 Magic2;\r
- };\r
-\r
-class Windex\r
- {\r
-public:\r
- bool m_Nucleo;\r
- bool m_RedAlpha;\r
- unsigned m_WordLength;\r
- unsigned m_AlphaSize;\r
- unsigned m_WordCount;\r
- unsigned m_Hi;\r
- unsigned m_CapacityInc;\r
- arrsize_t *m_Capacities;\r
- arrsize_t *m_Sizes;\r
- float *m_WordScores;\r
- seqindex_t **m_SeedIndexes;\r
- byte *m_UniqueCounts;\r
- unsigned m_CharToLetter[256];\r
-\r
-public:\r
- Windex();\r
- void ToFile(const string &FileName) const;\r
- void FromFile(const string &FileName);\r
- void FromSFasta(SFasta &SF);\r
- void FromSeqDB(const SeqDB &DB);\r
- void Clear(bool ctor = false);\r
- void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N);\r
- void Init(bool Nucleo, unsigned WordLength);\r
- void Init2(bool Nucleo, unsigned TableSize);\r
- void InitRed(unsigned WordLength);\r
- void InitWordScores(const float *const *SubstMx);\r
- void Reset();\r
- void LogMe() const;\r
- unsigned LogMemSize() const;\r
- void LogWordStats(unsigned TopWords = 10) const;\r
- const char *WordToStr(word_t Word) const;\r
- word_t SeqToWord(const byte *Seq) const;\r
- unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const;\r
- unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const;\r
- unsigned WordsToCounts(const word_t *Words, unsigned N,\r
- word_t *UniqueWords, seqcountperword_t *Counts) const;\r
- unsigned GetUniqueWords(const word_t *Words, unsigned N,\r
- word_t *UniqueWords) const;\r
- void LogSizeHisto() const;\r
- };\r
-\r
-#endif // windex_h\r
+++ /dev/null
-#include "myutils.h"\r
-#include "chime.h"\r
-\r
-void WriteChimeFileHdr(FILE *f)\r
- {\r
- if (f == 0)\r
- return;\r
-\r
- fprintf(f,\r
- "\tQuery" // 1\r
- "\tA" // 2\r
- "\tB" // 3\r
- "\tIdQM" // 4\r
- "\tIdQA" // 5\r
- "\tIdQB" // 6\r
- "\tIdAB" // 7\r
- "\tIdQT" // 8\r
- "\tLY" // 9\r
- "\tLN" // 10\r
- "\tLA" // 11\r
- "\tRY" // 12\r
- "\tRN" // 13\r
- "\tRA" // 14\r
- "\tDiv" // 15\r
- "\tY" // 16\r
- "\n"\r
- );\r
- }\r
-\r
-void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
- {\r
- if (f == 0)\r
- return;\r
-\r
- if (Hit.Div <= 0.0)\r
- {\r
- fprintf(f, "0.0000"); // 0\r
-\r
- fprintf(f,\r
- "\t%s", Hit.QLabel.c_str()); // 1\r
-\r
- fprintf(f,\r
- "\t*" // 2\r
- "\t*" // 3\r
- "\t*" // 4\r
- "\t*" // 5\r
- "\t*" // 6\r
- "\t*" // 7\r
- "\t*" // 8\r
- "\t*" // 9\r
- "\t*" // 10\r
- "\t*" // 11\r
- "\t*" // 12\r
- "\t*" // 13\r
- "\t*" // 14\r
- "\t*" // 15\r
- "\tN" // 16\r
- "\n"\r
- );\r
- return;\r
- }\r
-\r
- fprintf(f, "%.4f", Hit.Score); // 0\r
-\r
- fputc('\t', f);\r
- fputs(Hit.QLabel.c_str(), f); // 1\r
-\r
- fputc('\t', f);\r
- fputs(Hit.ALabel.c_str(), f); // 2\r
-\r
- fputc('\t', f);\r
- fputs(Hit.BLabel.c_str(), f); // 3\r
-\r
- fprintf(f, "\t%.1f", Hit.PctIdQM); // 4\r
- fprintf(f, "\t%.1f", Hit.PctIdQA); // 5\r
- fprintf(f, "\t%.1f", Hit.PctIdQB); // 6\r
- fprintf(f, "\t%.1f", Hit.PctIdAB); // 7\r
- fprintf(f, "\t%.1f", Hit.PctIdQT); // 8\r
-\r
- fprintf(f, "\t%u", Hit.CS_LY); // 9\r
- fprintf(f, "\t%u", Hit.CS_LN); // 10\r
- fprintf(f, "\t%u", Hit.CS_LA); // 11\r
-\r
- fprintf(f, "\t%u", Hit.CS_RY); // 12\r
- fprintf(f, "\t%u", Hit.CS_RN); // 13\r
- fprintf(f, "\t%u", Hit.CS_RA); // 14\r
-\r
- fprintf(f, "\t%.2f", Hit.Div); // 15\r
-\r
- fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
- fputc('\n', f);\r
- }\r
-\r
-unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
- {\r
- unsigned UL = 0;\r
- for (unsigned i = 0; i < L; ++i)\r
- if (!isgap(Seq[i]))\r
- ++UL;\r
- return UL;\r
- }\r
-\r
-void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
- {\r
- if (f == 0)\r
- return;\r
-\r
- if (Hit.Div <= 0.0)\r
- return;\r
-\r
- const string &Q3 = Hit.Q3;\r
- const string &A3 = Hit.A3;\r
- const string &B3 = Hit.B3;\r
-\r
- const byte *Q3Seq = (const byte *) Q3.c_str();\r
- const byte *A3Seq = (const byte *) A3.c_str();\r
- const byte *B3Seq = (const byte *) B3.c_str();\r
-\r
-// Aligned\r
- unsigned ColCount = SIZE(Q3);\r
- asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
-\r
- unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
- unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
- unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
-\r
- fprintf(f, "\n");\r
- fprintf(f, "------------------------------------------------------------------------\n");\r
- fprintf(f, "Query (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
- fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
- fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
-\r
-// Strip terminal gaps in query\r
- unsigned FromCol = UINT_MAX;\r
- unsigned ToCol = UINT_MAX;\r
- for (unsigned Col = 0; Col < ColCount; ++Col)\r
- {\r
- if (!isgap(Q3Seq[Col]))\r
- {\r
- if (FromCol == UINT_MAX)\r
- FromCol = Col;\r
- ToCol = Col;\r
- }\r
- }\r
-\r
- unsigned QPos = 0;\r
- unsigned APos = 0;\r
- unsigned BPos = 0;\r
- for (unsigned Col = 0; Col < FromCol; ++Col)\r
- {\r
- if (!isgap(A3Seq[Col]))\r
- ++APos;\r
- if (!isgap(B3Seq[Col]))\r
- ++BPos;\r
- }\r
-\r
- unsigned Range = ToCol - FromCol + 1;\r
- unsigned RowCount = (Range + 79)/80;\r
- unsigned RowFromCol = FromCol;\r
- for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
- {\r
- fprintf(f, "\n");\r
- unsigned RowToCol = RowFromCol + 79;\r
- if (RowToCol > ToCol)\r
- RowToCol = ToCol;\r
-\r
- // A row\r
- fprintf(f, "A %5u ", APos + 1);\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- if (a != q)\r
- a = tolower(a);\r
- fprintf(f, "%c", a);\r
- if (!isgap(a))\r
- ++APos;\r
- }\r
- fprintf(f, " %u\n", APos);\r
-\r
- // Q row\r
- fprintf(f, "Q %5u ", QPos + 1);\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- fprintf(f, "%c", q);\r
- if (!isgap(q))\r
- ++QPos;\r
- }\r
- fprintf(f, " %u\n", QPos);\r
-\r
- // B row\r
- fprintf(f, "B %5u ", BPos + 1);\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char b = B3Seq[Col];\r
- if (b != q)\r
- b = tolower(b);\r
- fprintf(f, "%c", b);\r
- if (!isgap(b))\r
- ++BPos;\r
- }\r
- fprintf(f, " %u\n", BPos);\r
-\r
- // Diffs\r
- fprintf(f, "Diffs ");\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- char c = ' ';\r
- if (isgap(q) || isgap(a) || isgap(b))\r
- c = ' ';\r
- else if (Col < Hit.ColXLo)\r
- {\r
- if (q == a && q == b)\r
- c = ' ';\r
- else if (q == a && q != b)\r
- c = 'A';\r
- else if (q == b && q != a)\r
- c = 'b';\r
- else if (a == b && q != a)\r
- c = 'N';\r
- else\r
- c = '?';\r
- }\r
- else if (Col > Hit.ColXHi)\r
- {\r
- if (q == a && q == b)\r
- c = ' ';\r
- else if (q == b && q != a)\r
- c = 'B';\r
- else if (q == a && q != b)\r
- c = 'a';\r
- else if (a == b && q != a)\r
- c = 'N';\r
- else\r
- c = '?';\r
- }\r
-\r
- fprintf(f, "%c", c);\r
- }\r
- fprintf(f, "\n");\r
-\r
- // SNPs\r
- fprintf(f, "Votes ");\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- char q = Q3Seq[Col];\r
- char a = A3Seq[Col];\r
- char b = B3Seq[Col];\r
-\r
- bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
- bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
-\r
- char c = ' ';\r
- if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
- c = ' ';\r
- else if (Col < Hit.ColXLo)\r
- {\r
- if (q == a && q == b)\r
- c = ' ';\r
- else if (q == a && q != b)\r
- c = '+';\r
- else if (q == b && q != a)\r
- c = '!';\r
- else\r
- c = '0';\r
- }\r
- else if (Col > Hit.ColXHi)\r
- {\r
- if (q == a && q == b)\r
- c = ' ';\r
- else if (q == b && q != a)\r
- c = '+';\r
- else if (q == a && q != b)\r
- c = '!';\r
- else\r
- c = '0';\r
- }\r
-\r
- fprintf(f, "%c", c);\r
- }\r
- fprintf(f, "\n");\r
-\r
- // LR row\r
- fprintf(f, "Model ");\r
- for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
- {\r
- if (Col < Hit.ColXLo)\r
- fprintf(f, "A");\r
- else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
- fprintf(f, "x");\r
- else\r
- fprintf(f, "B");\r
- }\r
-\r
- fprintf(f, "\n");\r
-\r
- RowFromCol += 80;\r
- }\r
- fprintf(f, "\n");\r
-\r
- double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
- double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
-\r
- unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
- unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
-\r
- double PctL = Pct(Hit.CS_LY, LTot);\r
- double PctR = Pct(Hit.CS_RY, RTot);\r
-\r
- fprintf(f,\r
- "Ids. QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
- Hit.PctIdQA,\r
- Hit.PctIdQB,\r
- Hit.PctIdAB,\r
- Hit.PctIdQM,\r
- Div);\r
-\r
- fprintf(f,\r
- "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
- LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
- RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
- Hit.Score);\r
- }\r