From: Sarah Westcott Date: Wed, 2 May 2012 15:19:21 +0000 (-0400) Subject: removing chime source files from mother project. X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a6cf29fa4dac0909c7582cb1094151d34093ee76 removing chime source files from mother project. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ff18d58..6e156df 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -20,28 +20,6 @@ A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; }; A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; }; A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; }; - A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; }; - A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; }; - A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; }; - A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365A137DAB8300332B0C /* alnparams.cpp */; }; - A74D368B137DAB8400332B0C /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365C137DAB8300332B0C /* alpha.cpp */; }; - A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365E137DAB8300332B0C /* alpha2.cpp */; }; - A74D368D137DAB8400332B0C /* fractid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3664137DAB8300332B0C /* fractid.cpp */; }; - A74D368E137DAB8400332B0C /* getparents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3665137DAB8300332B0C /* getparents.cpp */; }; - A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3666137DAB8300332B0C /* globalalign2.cpp */; }; - A74D3690137DAB8400332B0C /* make3way.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366A137DAB8300332B0C /* make3way.cpp */; }; - A74D3691137DAB8400332B0C /* mx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366B137DAB8300332B0C /* mx.cpp */; }; - A74D3692137DAB8400332B0C /* myutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366E137DAB8300332B0C /* myutils.cpp */; }; - A74D3693137DAB8400332B0C /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3672137DAB8300332B0C /* path.cpp */; }; - A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3674137DAB8300332B0C /* searchchime.cpp */; }; - A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3676137DAB8300332B0C /* seqdb.cpp */; }; - A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3678137DAB8300332B0C /* setnucmx.cpp */; }; - A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3679137DAB8300332B0C /* sfasta.cpp */; }; - A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D367F137DAB8300332B0C /* tracebackbit.cpp */; }; - A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3681137DAB8300332B0C /* uchime_main.cpp */; }; - A74D369A137DAB8400332B0C /* usort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3683137DAB8300332B0C /* usort.cpp */; }; - A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3684137DAB8300332B0C /* viterbifast.cpp */; }; - A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; }; A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; @@ -405,56 +383,6 @@ A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurmetastats.cpp; sourceTree = ""; }; A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = ""; }; A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = ""; }; - A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = ""; }; - A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = ""; }; - A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = ""; }; - A74D3658137DAB8300332B0C /* allocs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocs.h; sourceTree = ""; }; - A74D3659137DAB8300332B0C /* alnheuristics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnheuristics.h; sourceTree = ""; }; - A74D365A137DAB8300332B0C /* alnparams.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alnparams.cpp; sourceTree = ""; }; - A74D365B137DAB8300332B0C /* alnparams.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnparams.h; sourceTree = ""; }; - A74D365C137DAB8300332B0C /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = ""; }; - A74D365D137DAB8300332B0C /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = ""; }; - A74D365E137DAB8300332B0C /* alpha2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha2.cpp; sourceTree = ""; }; - A74D365F137DAB8300332B0C /* chainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chainer.h; sourceTree = ""; }; - A74D3660137DAB8300332B0C /* chime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chime.h; sourceTree = ""; }; - A74D3661137DAB8300332B0C /* diagbox.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diagbox.h; sourceTree = ""; }; - A74D3662137DAB8300332B0C /* dp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dp.h; sourceTree = ""; }; - A74D3663137DAB8300332B0C /* evalue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = evalue.h; sourceTree = ""; }; - A74D3664137DAB8300332B0C /* fractid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fractid.cpp; sourceTree = ""; }; - A74D3665137DAB8300332B0C /* getparents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getparents.cpp; sourceTree = ""; }; - A74D3666137DAB8300332B0C /* globalalign2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = globalalign2.cpp; sourceTree = ""; }; - A74D3667137DAB8300332B0C /* help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = help.h; sourceTree = ""; }; - A74D3668137DAB8300332B0C /* hsp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hsp.h; sourceTree = ""; }; - A74D3669137DAB8300332B0C /* hspfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hspfinder.h; sourceTree = ""; }; - A74D366A137DAB8300332B0C /* make3way.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = make3way.cpp; sourceTree = ""; }; - A74D366B137DAB8300332B0C /* mx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mx.cpp; sourceTree = ""; }; - A74D366C137DAB8300332B0C /* mx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mx.h; sourceTree = ""; }; - A74D366D137DAB8300332B0C /* myopts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myopts.h; sourceTree = ""; }; - A74D366E137DAB8300332B0C /* myutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myutils.cpp; sourceTree = ""; }; - A74D366F137DAB8300332B0C /* myutils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myutils.h; sourceTree = ""; }; - A74D3670137DAB8300332B0C /* orf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = orf.h; sourceTree = ""; }; - A74D3671137DAB8300332B0C /* out.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = out.h; sourceTree = ""; }; - A74D3672137DAB8300332B0C /* path.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = path.cpp; sourceTree = ""; }; - A74D3673137DAB8300332B0C /* path.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = path.h; sourceTree = ""; }; - A74D3674137DAB8300332B0C /* searchchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = searchchime.cpp; sourceTree = ""; }; - A74D3675137DAB8300332B0C /* seq.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seq.h; sourceTree = ""; }; - A74D3676137DAB8300332B0C /* seqdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqdb.cpp; sourceTree = ""; }; - A74D3677137DAB8300332B0C /* seqdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqdb.h; sourceTree = ""; }; - A74D3678137DAB8300332B0C /* setnucmx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setnucmx.cpp; sourceTree = ""; }; - A74D3679137DAB8300332B0C /* sfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sfasta.cpp; sourceTree = ""; }; - A74D367A137DAB8300332B0C /* sfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sfasta.h; sourceTree = ""; }; - A74D367B137DAB8300332B0C /* svnmods.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnmods.h; sourceTree = ""; }; - A74D367C137DAB8300332B0C /* svnversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnversion.h; sourceTree = ""; }; - A74D367D137DAB8300332B0C /* timers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timers.h; sourceTree = ""; }; - A74D367E137DAB8300332B0C /* timing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timing.h; sourceTree = ""; }; - A74D367F137DAB8300332B0C /* tracebackbit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tracebackbit.cpp; sourceTree = ""; }; - A74D3680137DAB8300332B0C /* uc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uc.h; sourceTree = ""; }; - A74D3681137DAB8300332B0C /* uchime_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uchime_main.cpp; sourceTree = ""; }; - A74D3682137DAB8300332B0C /* ultra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ultra.h; sourceTree = ""; }; - A74D3683137DAB8300332B0C /* usort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = usort.cpp; sourceTree = ""; }; - A74D3684137DAB8300332B0C /* viterbifast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = viterbifast.cpp; sourceTree = ""; }; - A74D3685137DAB8300332B0C /* windex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = windex.h; sourceTree = ""; }; - A74D3686137DAB8300332B0C /* writechhit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = writechhit.cpp; sourceTree = ""; }; A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = ""; }; A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = ""; }; A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = ""; }; @@ -1221,63 +1149,6 @@ name = Products; sourceTree = ""; }; - A74D3644137DA7CE00332B0C /* uchime */ = { - isa = PBXGroup; - children = ( - A74D3655137DAB8300332B0C /* addtargets2.cpp */, - A74D3656137DAB8300332B0C /* alignchime.cpp */, - A74D3657137DAB8300332B0C /* alignchimel.cpp */, - A74D3658137DAB8300332B0C /* allocs.h */, - A74D3659137DAB8300332B0C /* alnheuristics.h */, - A74D365A137DAB8300332B0C /* alnparams.cpp */, - A74D365B137DAB8300332B0C /* alnparams.h */, - A74D365C137DAB8300332B0C /* alpha.cpp */, - A74D365D137DAB8300332B0C /* alpha.h */, - A74D365E137DAB8300332B0C /* alpha2.cpp */, - A74D365F137DAB8300332B0C /* chainer.h */, - A74D3660137DAB8300332B0C /* chime.h */, - A74D3661137DAB8300332B0C /* diagbox.h */, - A74D3662137DAB8300332B0C /* dp.h */, - A74D3663137DAB8300332B0C /* evalue.h */, - A74D3664137DAB8300332B0C /* fractid.cpp */, - A74D3665137DAB8300332B0C /* getparents.cpp */, - A74D3666137DAB8300332B0C /* globalalign2.cpp */, - A74D3667137DAB8300332B0C /* help.h */, - A74D3668137DAB8300332B0C /* hsp.h */, - A74D3669137DAB8300332B0C /* hspfinder.h */, - A74D366A137DAB8300332B0C /* make3way.cpp */, - A74D366B137DAB8300332B0C /* mx.cpp */, - A74D366C137DAB8300332B0C /* mx.h */, - A74D366D137DAB8300332B0C /* myopts.h */, - A74D366E137DAB8300332B0C /* myutils.cpp */, - A74D366F137DAB8300332B0C /* myutils.h */, - A74D3670137DAB8300332B0C /* orf.h */, - A74D3671137DAB8300332B0C /* out.h */, - A74D3672137DAB8300332B0C /* path.cpp */, - A74D3673137DAB8300332B0C /* path.h */, - A74D3674137DAB8300332B0C /* searchchime.cpp */, - A74D3675137DAB8300332B0C /* seq.h */, - A74D3676137DAB8300332B0C /* seqdb.cpp */, - A74D3677137DAB8300332B0C /* seqdb.h */, - A74D3678137DAB8300332B0C /* setnucmx.cpp */, - A74D3679137DAB8300332B0C /* sfasta.cpp */, - A74D367A137DAB8300332B0C /* sfasta.h */, - A74D367B137DAB8300332B0C /* svnmods.h */, - A74D367C137DAB8300332B0C /* svnversion.h */, - A74D367D137DAB8300332B0C /* timers.h */, - A74D367E137DAB8300332B0C /* timing.h */, - A74D367F137DAB8300332B0C /* tracebackbit.cpp */, - A74D3680137DAB8300332B0C /* uc.h */, - A74D3681137DAB8300332B0C /* uchime_main.cpp */, - A74D3682137DAB8300332B0C /* ultra.h */, - A74D3683137DAB8300332B0C /* usort.cpp */, - A74D3684137DAB8300332B0C /* viterbifast.cpp */, - A74D3685137DAB8300332B0C /* windex.h */, - A74D3686137DAB8300332B0C /* writechhit.cpp */, - ); - name = uchime; - sourceTree = ""; - }; A7D161E7149F7F50000523E8 /* fortran */ = { isa = PBXGroup; children = ( @@ -1795,7 +1666,6 @@ A7E9BA4512D3965600DA6239 /* chimera */ = { isa = PBXGroup; children = ( - A74D3644137DA7CE00332B0C /* uchime */, A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */, A7E9B65D12D37EC300DA6239 /* bellerophon.h */, A7E9B67412D37EC400DA6239 /* ccode.cpp */, @@ -2245,28 +2115,6 @@ A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */, A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */, A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */, - A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */, - A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */, - A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */, - A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */, - A74D368B137DAB8400332B0C /* alpha.cpp in Sources */, - A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */, - A74D368D137DAB8400332B0C /* fractid.cpp in Sources */, - A74D368E137DAB8400332B0C /* getparents.cpp in Sources */, - A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */, - A74D3690137DAB8400332B0C /* make3way.cpp in Sources */, - A74D3691137DAB8400332B0C /* mx.cpp in Sources */, - A74D3692137DAB8400332B0C /* myutils.cpp in Sources */, - A74D3693137DAB8400332B0C /* path.cpp in Sources */, - A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */, - A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */, - A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */, - A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */, - A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */, - A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */, - A74D369A137DAB8400332B0C /* usort.cpp in Sources */, - A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */, - A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */, A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */, A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */, A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */, diff --git a/addtargets2.cpp b/addtargets2.cpp deleted file mode 100644 index 4e0dbd1..0000000 --- a/addtargets2.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//#if UCHIMES - -#include "myutils.h" -#include "chime.h" -#include "ultra.h" -#include - -const float MAX_WORD_COUNT_DROP = 1; - -void SortDescending(const vector &Values, vector &Order); -bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path); -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path); -void USort(const SeqData &Query, const SeqDB &DB, vector &WordCounts, - vector &Order); - -void AddTargets(SeqDB &DB, const SeqData &Query, set &TargetIndexes) - { - const unsigned SeqCount = DB.GetSeqCount(); - if (SeqCount == 0) - return; - - vector WordCounts; - vector Order; - USort(Query, DB, WordCounts, Order); - asserta(SIZE(Order) == SeqCount); - unsigned TopSeqIndex = Order[0]; - float TopWordCount = WordCounts[TopSeqIndex]; - for (unsigned i = 0; i < SeqCount; ++i) - { - unsigned SeqIndex = Order[i]; - float WordCount = WordCounts[SeqIndex]; - if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP) - return; - TargetIndexes.insert(SeqIndex); - } - } - -//#endif diff --git a/alignchime.cpp b/alignchime.cpp deleted file mode 100644 index d7b05a8..0000000 --- a/alignchime.cpp +++ /dev/null @@ -1,649 +0,0 @@ -#include "myutils.h" -#include "seq.h" -#include "chime.h" -#include "dp.h" - -#define TRACE 0 -#define TRACE_BS 0 - -void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB, - const string &PathQA, const string &PathQB, - string &Q3, string &A3, string &B3); - -void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3, - const string &QLabel, const string &ALabel, const string &BLabel, - ChimeHit2 &Hit); - -double GetScore2(double Y, double N, double A) - { - return Y/(opt_xn*(N + opt_dn) + opt_xa*A); - } - -void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3, - const string &QLabel, const string &ALabel, const string &BLabel, - ChimeHit2 &Hit) - { - Hit.Clear(); - Hit.QLabel = QLabel; - - const byte *Q3Seq = (const byte *) Q3.c_str(); - const byte *A3Seq = (const byte *) A3.c_str(); - const byte *B3Seq = (const byte *) B3.c_str(); - - const unsigned ColCount = SIZE(Q3); - asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount); - -#if TRACE - Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq); - Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq); - Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq); -#endif - -// Discard terminal gaps - unsigned ColLo = UINT_MAX; - unsigned ColHi = UINT_MAX; - for (unsigned Col = 2; Col + 2 < ColCount; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (isacgt(q) && isacgt(a) && isacgt(b)) - { - if (ColLo == UINT_MAX) - ColLo = Col; - ColHi = Col; - } - } - - if (ColLo == UINT_MAX) - return; - - unsigned QPos = 0; - unsigned APos = 0; - unsigned BPos = 0; - unsigned DiffCount = 0; - - vector ColToQPos(ColLo, UINT_MAX); - vector AccumCount(ColLo, UINT_MAX); - vector AccumSameA(ColLo, UINT_MAX); - vector AccumSameB(ColLo, UINT_MAX); - vector AccumForA(ColLo, UINT_MAX); - vector AccumForB(ColLo, UINT_MAX); - vector AccumAbstain(ColLo, UINT_MAX); - vector AccumAgainst(ColLo, UINT_MAX); - - unsigned SumSameA = 0; - unsigned SumSameB = 0; - unsigned SumSameAB = 0; - unsigned Sum = 0; - unsigned SumForA = 0; - unsigned SumForB = 0; - unsigned SumAbstain = 0; - unsigned SumAgainst = 0; - for (unsigned Col = ColLo; Col <= ColHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (isacgt(q) && isacgt(a) && isacgt(b)) - { - if (q == a) - ++SumSameA; - if (q == b) - ++SumSameB; - if (a == b) - ++SumSameAB; - if (q == a && q != b) - ++SumForA; - if (q == b && q != a) - ++SumForB; - if (a == b && q != a) - ++SumAgainst; - if (q != a && q != b) - ++SumAbstain; - ++Sum; - } - - ColToQPos.push_back(QPos); - AccumSameA.push_back(SumSameA); - AccumSameB.push_back(SumSameB); - AccumCount.push_back(Sum); - AccumForA.push_back(SumForA); - AccumForB.push_back(SumForB); - AccumAbstain.push_back(SumAbstain); - AccumAgainst.push_back(SumAgainst); - - if (q != '-') - ++QPos; - if (a != '-') - ++APos; - if (b != '-') - ++BPos; - } - - asserta(SIZE(ColToQPos) == ColHi+1); - asserta(SIZE(AccumSameA) == ColHi+1); - asserta(SIZE(AccumSameB) == ColHi+1); - asserta(SIZE(AccumAbstain) == ColHi+1); - asserta(SIZE(AccumAgainst) == ColHi+1); - - double IdQA = double(SumSameA)/Sum; - double IdQB = double(SumSameB)/Sum; - double IdAB = double(SumSameAB)/Sum; - double MaxId = max(IdQA, IdQB); - -#if TRACE - Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0); - Log("\n"); - Log(" x AQB IdAL IdBL IdAR IdBR DivAB DivBA YAL YBL YAR YBR AbL AbR ScoreAB ScoreAB XLo Xhi\n"); - Log("----- --- ----- ----- ----- ----- ------ ------ ----- ----- ----- ----- ----- ----- ------- ------- ----- -----\n"); -#endif - unsigned BestXLo = UINT_MAX; - unsigned BestXHi = UINT_MAX; - double BestDiv = 0.0; - double BestIdQM = 0.0; - double BestScore = 0.0; - -// Find range of cols BestXLo..BestXHi that maximizes score - bool FirstA = false; - -// NOTE: Must be < ColHi not <= because use Col+1 below - for (unsigned Col = ColLo; Col < ColHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - unsigned SameAL = AccumSameA[Col]; - unsigned SameBL = AccumSameB[Col]; - unsigned SameAR = SumSameA - AccumSameA[Col]; - unsigned SameBR = SumSameB - AccumSameB[Col]; - - double IdAB = double(SameAL + SameBR)/Sum; - double IdBA = double(SameBL + SameAR)/Sum; - - unsigned ForAL = AccumForA[Col]; - unsigned ForBL = AccumForB[Col]; - unsigned ForAR = SumForA - AccumForA[Col+1]; - unsigned ForBR = SumForB - AccumForB[Col+1]; - unsigned AbL = AccumAbstain[Col]; - unsigned AbR = SumAbstain - AccumAbstain[Col+1]; - - double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR); - double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR); - - double DivAB = IdAB/MaxId; - double DivBA = IdBA/MaxId; - double MaxDiv = max(DivAB, DivBA); - - //if (MaxDiv > BestDiv) - // { - // BestDiv = MaxDiv; - // BestXLo = Col; - // BestXHi = Col; - // FirstA = (DivAB > DivBA); - // if (FirstA) - // BestIdQM = IdAB; - // else - // BestIdQM = IdBA; - // } - //else if (MaxDiv == BestDiv) - // BestXHi = Col; - - double MaxScore = max(ScoreAB, ScoreBA); - if (MaxScore > BestScore) - { - BestScore = MaxScore; - BestXLo = Col; - BestXHi = Col; - FirstA = (ScoreAB > ScoreBA); - if (FirstA) - BestIdQM = IdAB; - else - BestIdQM = IdBA; - if (MaxDiv > BestDiv) - BestDiv = MaxDiv; - } - else if (MaxScore == BestScore) - { - BestXHi = Col; - if (MaxDiv > BestDiv) - BestDiv = MaxDiv; - } - -#if TRACE - { - Log("%5u", Col); - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - Log(" %c%c%c", a, q, b); - Log(" %5u", SameAL); - Log(" %5u", SameBL); - Log(" %5u", SameAR); - Log(" %5u", SameBR); - Log(" %5.4f", DivAB); - Log(" %5.4f", DivBA); - Log(" %5u", ForAL); - Log(" %5u", ForBL); - Log(" %5u", ForAR); - Log(" %5u", ForBR); - Log(" %5u", AbL); - Log(" %5u", AbR); - Log(" %7.4f", ScoreAB); - Log(" %7.4f", ScoreBA); - if (BestXLo != UINT_MAX) - Log(" %5u", BestXLo); - if (BestXHi != UINT_MAX) - Log(" %5u", BestXHi); - Log("\n"); - } -#endif - } - - if (BestXLo == UINT_MAX) - { -#if TRACE - Log("\n"); - Log("No crossover found.\n"); -#endif - return; - } -#if TRACE - Log("BestX col %u - %u\n", BestXLo, BestXHi); -#endif - -// Find maximum region of identity within BestXLo..BestXHi - unsigned ColXLo = (BestXLo + BestXHi)/2; - unsigned ColXHi = ColXLo; - unsigned SegLo = UINT_MAX; - unsigned SegHi = UINT_MAX; - for (unsigned Col = BestXLo; Col <= BestXHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (q == a && q == b) - { - if (SegLo == UINT_MAX) - SegLo = Col; - SegHi = Col; - } - else - { - unsigned SegLength = SegHi - SegLo + 1; - unsigned BestSegLength = ColXHi - ColXLo + 1; - if (SegLength > BestSegLength) - { - ColXLo = SegLo; - ColXHi = SegHi; - } - SegLo = UINT_MAX; - SegHi = UINT_MAX; - } - } - unsigned SegLength = SegHi - SegLo + 1; - unsigned BestSegLength = ColXHi - ColXLo + 1; - if (SegLength > BestSegLength) - { - ColXLo = SegLo; - ColXHi = SegHi; - } - - QPos = 0; - for (unsigned x = 0; x < ColCount; ++x) - { - if (x == ColXLo) - Hit.QXLo = QPos; - else if (x == ColXHi) - { - Hit.QXHi = QPos; - break; - } - char q = Q3Seq[x]; - if (q != '-') - ++QPos; - } - - Hit.ColXLo = ColXLo; - Hit.ColXHi = ColXHi; - - //if (FirstA) - // { - // Hit.LY = AccumForA[ColXLo]; - // Hit.LN = AccumForB[ColXLo]; - - // Hit.RY = SumForB - AccumForB[ColXHi]; - // Hit.RN = SumForA - AccumForA[ColXHi]; - // } - //else - // { - // Hit.LY = AccumForB[ColXLo]; - // Hit.LN = AccumForA[ColXLo]; - // Hit.RY = SumForA - AccumForA[ColXHi]; - // Hit.RN = SumForB - AccumForB[ColXHi]; - // } - - //Hit.LA = AccumAgainst[ColXLo]; - //Hit.LD = AccumAbstain[ColXLo]; - - //Hit.RA = SumAgainst - AccumAgainst[ColXHi]; - //Hit.RD = SumAbstain - AccumAbstain[ColXHi]; - - Hit.PctIdAB = IdAB*100.0; - Hit.PctIdQM = BestIdQM*100.0; - - Hit.Div = (BestDiv - 1.0)*100.0; - - //Hit.QSD = QSD; - Hit.Q3 = Q3; - Hit.QLabel = QLabel; - if (FirstA) - { - //Hit.ASD = ASD; - //Hit.BSD = BSD; - //Hit.PathQA = PathQA; - //Hit.PathQB = PathQB; - Hit.A3 = A3; - Hit.B3 = B3; - Hit.ALabel = ALabel; - Hit.BLabel = BLabel; - Hit.PctIdQA = IdQA*100.0; - Hit.PctIdQB = IdQB*100.0; - } - else - { - Hit.A3 = B3; - Hit.B3 = A3; - Hit.ALabel = BLabel; - Hit.BLabel = ALabel; - Hit.PctIdQA = IdQB*100.0; - Hit.PctIdQB = IdQA*100.0; - } - -// CS SNPs - Hit.CS_LY = 0; - Hit.CS_LN = 0; - Hit.CS_RY = 0; - Hit.CS_RN = 0; - Hit.CS_LA = 0; - Hit.CS_RA = 0; - - //vector Cons; - //for (unsigned Col = 0; Col < ColCount; ++Col) - // { - // char q = Q3Seq[Col]; - // char a = A3Seq[Col]; - // char b = B3Seq[Col]; - // if (q == a && q == b && a == b) - // { - // Cons.push_back(1.0f); - // continue; - // } - - // bool gapq = isgap(q); - // bool gapa = isgap(a); - // bool gapb = isgap(b); - - // if (!gapq && !gapa && !gapb) - // { - // if (q == a || q == b || a == b) - // Cons.push_back(0.75); - // else - // Cons.push_back(0.5); - // } - // else - // { - // if (!gapa && (a == b || a == q)) - // Cons.push_back(0.5f); - // else if (!gapb && b == q) - // Cons.push_back(0.5f); - // else - // Cons.push_back(0.0f); - // } - // } - - //float fLY = 0.0f; - //float fLN = 0.0f; - //float fLA = 0.0f; - //float fRY = 0.0f; - //float fRN = 0.0f; - //float fRA = 0.0f; - for (unsigned Col = ColLo; Col <= ColHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - if (q == a && q == b && a == b) - continue; - - unsigned ngaps = 0; - if (isgap(q)) - ++ngaps; - if (isgap(a)) - ++ngaps; - if (isgap(b)) - ++ngaps; - - if (opt_skipgaps) - { - if (ngaps == 3) - continue; - } - else - { - if (ngaps == 2) - continue; - } - - if (!FirstA) - swap(a, b); - - //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4; - //if (Col < ColXLo) - // { - // if (q == a && q != b) - // fLY += AvgCons; - // else if (q == b && q != a) - // fLN += AvgCons; - // else - // fLA += AvgCons; - // } - //else if (Col > ColXHi) - // { - // if (q == b && q != a) - // fRY += AvgCons; - // else if (q == a && q != b) - // fRN += AvgCons; - // else - // fRA += AvgCons; - // } - - if (opt_skipgaps2) - { - if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]))) - continue; - if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]))) - continue; - } - - //if (Col > 0 && isgap(Q3Seq[Col-1])) - //continue; - //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1])) - // continue; - - if (Col < ColXLo) - { - if (q == a && q != b) - ++Hit.CS_LY; - else if (q == b && q != a) - ++Hit.CS_LN; - else - ++Hit.CS_LA; - } - else if (Col > ColXHi) - { - if (q == b && q != a) - ++Hit.CS_RY; - else if (q == a && q != b) - ++Hit.CS_RN; - else - ++Hit.CS_RA; - } - } - - double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA); - double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA); - Hit.Score = ScoreL*ScoreR; - - extern bool g_UchimeDeNovo; - - //if (0)//g_UchimeDeNovo) - // { - // double AbQ = GetAbFromLabel(QLabel.c_str()); - // double AbA = GetAbFromLabel(ALabel.c_str()); - // double AbB = GetAbFromLabel(BLabel.c_str()); - // if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0) - // { - // double MinAb = min(AbA, AbB); - // double Ratio = MinAb/AbQ; - // double t = Ratio - opt_abx; - // // double Factor = 2.0/(1.0 + exp(-t)); - // double Factor = min(Ratio, opt_abx)/opt_abx; - // if (opt_verbose) - // Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str()); - // Hit.Score *= Factor; - // } - // } - - extern FILE *g_fUChimeAlns; - if (g_fUChimeAlns != 0 && Hit.Div > 0.0) - { - void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit); - WriteChimeHitX(g_fUChimeAlns, Hit); - } - } - -void AlignChime3(const string &Q3, const string &A3, const string &B3, - const string &QLabel, const string &ALabel, const string &BLabel, - ChimeHit2 &Hit) - { - if (opt_ucl) - AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit); - else - AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit); - } - -static void StripGaps(const byte *Seq, unsigned L, string &s) - { - s.clear(); - for (unsigned i = 0; i < L; ++i) - { - char c = Seq[i]; - if (!isgap(c)) - s.push_back(c); - } - } - -static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut) - { - SDOut = SDIn; - byte *s = myalloc(byte, SDIn.L); - unsigned k = 0; - for (unsigned i = 0; i < SDIn.L; ++i) - { - char c = SDIn.Seq[i]; - if (!isgap(c)) - s[k++] = toupper(c); - } - SDOut.Seq = s; - SDOut.L = k; - } - -void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD, - const string &PathQA, const string &PathQB, ChimeHit2 &Hit) - { - //if (opt_ucl) - // { - // AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit); - // return; - // } - - string Q3; - string A3; - string B3; - Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3); - - AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit); - } - -void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3, - ChimeHit2 &Hit) - { - SeqData QSD; - SeqData ASD; - SeqData BSD; - StripGapsAlloc(QSD3, QSD); - StripGapsAlloc(ASD3, ASD); - StripGapsAlloc(BSD3, BSD); - - string PathQA; - string PathQB; - bool FoundQA = GlobalAlign(QSD, ASD, PathQA); - bool FoundQB = GlobalAlign(QSD, BSD, PathQB); - if (!FoundQA || !FoundQB) - { - Hit.Clear(); - Hit.QLabel = QSD3.Label; - return; - } - - AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit); - - myfree((void *) QSD.Seq); - myfree((void *) ASD.Seq); - myfree((void *) BSD.Seq); - } - -void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3, - ChimeHit2 &Hit) - { - if (opt_realign) - { - AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit); - return; - } - - string Q3; - string A3; - string B3; - - const unsigned ColCount = QSD3.L; - asserta(ASD3.L == ColCount && BSD3.L == ColCount); - - Q3.reserve(ColCount); - A3.reserve(ColCount); - B3.reserve(ColCount); - - const byte *QS = QSD3.Seq; - const byte *AS = ASD3.Seq; - const byte *BS = BSD3.Seq; - for (unsigned Col = 0; Col < ColCount; ++Col) - { - byte q = toupper(QS[Col]); - byte a = toupper(AS[Col]); - byte b = toupper(BS[Col]); - - if (isgap(q) && isgap(a) && isgap(b)) - continue; - - Q3.push_back(q); - A3.push_back(a); - B3.push_back(b); - } - - AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit); - } diff --git a/alignchimel.cpp b/alignchimel.cpp deleted file mode 100644 index ae152af..0000000 --- a/alignchimel.cpp +++ /dev/null @@ -1,417 +0,0 @@ -#include "myutils.h" -#include "seq.h" -#include "chime.h" - -#define TRACE 0 - -/*** -Let: - S[i] = Score of col i: 0=no SNP, +1 = Y, -3 = N or A. - - V[k] = Best segment score from j, j+1 .. k for all possible j - max(j) Sum i=j..k S[i] - -Recursion relation: - V[k] = S[k] + max (V[k-1], 0) -***/ - -void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3, - const string &QLabel, const string &ALabel, const string &BLabel, - ChimeHit2 &Hit); - -void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB, - const string &PathQA, const string &PathQB, - string &Q3, string &A3, string &B3); - -double GetScore2(double Y, double N, double A); - -void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3, - const string &QLabel, const string &ALabel, const string &BLabel, - ChimeHit2 &Hit) - { - Hit.Clear(); - - const byte *Q3Seq = (const byte *) Q3.c_str(); - const byte *A3Seq = (const byte *) A3.c_str(); - const byte *B3Seq = (const byte *) B3.c_str(); - - const unsigned ColCount = SIZE(Q3); - asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount); - - vector ColScoresA(ColCount, 0.0f); - vector ColScoresB(ColCount, 0.0f); - - float ScoreN = -(float) opt_xn; - unsigned QL = 0; - for (unsigned Col = 0; Col < ColCount; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (!isgap(q)) - ++QL; - - if (q == a && q == b && a == b) - continue; - - if (isgap(q) || isgap(a) || isgap(b)) - continue; - - if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]))) - continue; - - if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]))) - continue; - - if (q == a && q != b) - ColScoresA[Col] = 1; - else - ColScoresA[Col] = ScoreN; - - if (q == b && q != a) - ColScoresB[Col] = 1; - else - ColScoresB[Col] = ScoreN; - } - - vector LVA(ColCount, 0.0f); - vector LVB(ColCount, 0.0f); - - LVA[0] = ColScoresA[0]; - LVB[0] = ColScoresB[0]; - for (unsigned Col = 1; Col < ColCount; ++Col) - { - LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col]; - LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col]; - } - - vector RVA(ColCount, 0.0f); - vector RVB(ColCount, 0.0f); - - RVA[ColCount-1] = ColScoresA[ColCount-1]; - RVB[ColCount-1] = ColScoresB[ColCount-1]; - for (int Col = ColCount-2; Col >= 0; --Col) - { - RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col]; - RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col]; - } - - bool FirstA = true; - float MaxSum = 0.0; - unsigned ColX = UINT_MAX; - for (unsigned Col = 1; Col < ColCount-1; ++Col) - { - float Sum = LVA[Col] + RVB[Col+1]; - if (Sum > MaxSum) - { - FirstA = true; - MaxSum = Sum; - ColX = Col; - } - } - - for (unsigned Col = 1; Col < ColCount-1; ++Col) - { - float Sum = LVB[Col] + RVA[Col+1]; - if (Sum > MaxSum) - { - FirstA = false; - MaxSum = Sum; - ColX = Col; - } - } - if (ColX == UINT_MAX) - return; - - unsigned ColLo = UINT_MAX; - unsigned ColHi = UINT_MAX; - if (FirstA) - { - float Sum = 0.0f; - for (int Col = ColX; Col >= 0; --Col) - { - Sum += ColScoresA[Col]; - if (Sum >= LVA[ColX]) - { - ColLo = Col; - break; - } - } - asserta(Sum >= LVA[ColX]); - Sum = 0.0f; - for (unsigned Col = ColX+1; Col < ColCount; ++Col) - { - Sum += ColScoresB[Col]; - if (Sum >= RVB[ColX]) - { - ColHi = Col; - break; - } - } - asserta(Sum >= RVB[ColX]); - } - else - { - float Sum = 0.0f; - for (int Col = ColX; Col >= 0; --Col) - { - Sum += ColScoresB[Col]; - if (Sum >= LVB[ColX]) - { - ColLo = Col; - break; - } - } - asserta(Sum >= LVB[ColX]); - Sum = 0.0f; - for (unsigned Col = ColX+1; Col < ColCount; ++Col) - { - Sum += ColScoresA[Col]; - if (Sum >= RVA[ColX]) - { - ColHi = Col; - break; - } - } - asserta(Sum >= RVA[ColX]); - } - - unsigned ColXHi = ColX; - for (unsigned Col = ColX + 1; Col < ColCount; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (q == a && q == b && !isgap(q)) - ColXHi = Col; - else - break; - } - - unsigned ColXLo = ColX; - for (int Col = (int) ColX - 1; Col >= 0; --Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (q == a && q == b && !isgap(q)) - ColXLo = Col; - else - break; - } - - unsigned IdQA = 0; - unsigned IdQB = 0; - unsigned IdAB = 0; - unsigned NQA = 0; - unsigned NQB = 0; - unsigned NAB = 0; - for (unsigned Col = 0; Col < ColCount; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (!isgap(q) && !isgap(a)) - { - ++NQA; - if (q == a) - ++IdQA; - } - - if (!isgap(q) && !isgap(b)) - { - ++NQB; - if (q == b) - ++IdQB; - } - - if (!isgap(a) && !isgap(b)) - { - ++NAB; - if (a == b) - ++IdAB; - } - } - - Hit.PctIdQA = Pct(IdQA, NQA); - Hit.PctIdQB = Pct(IdQB, NQB); - Hit.PctIdAB = Pct(IdAB, NAB); - - unsigned LIdQA = 0; - unsigned LIdQB = 0; - for (unsigned Col = ColLo; Col < ColXLo; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (!isgap(q) && !isgap(a)) - { - if (q == a) - ++LIdQA; - } - - if (!isgap(q) && !isgap(b)) - { - if (q == b) - ++LIdQB; - } - } - - unsigned RIdQA = 0; - unsigned RIdQB = 0; - for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - if (!isgap(q) && !isgap(a)) - { - if (q == a) - ++RIdQA; - } - - if (!isgap(q) && !isgap(b)) - { - if (q == b) - ++RIdQB; - } - } - - unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB); - unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB); - unsigned MinIdDiff = min(IdDiffL, IdDiffR); - unsigned ColRange = ColHi - ColLo + 1; - if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract) - return; - -// double Div = Pct(MinIdDiff, QSD.L); - -#if TRACE - { - Log(" Col A Q B ScoreA ScoreB LVA LVB RVA RVB\n"); - Log("----- - - - ------- ------- ------- ------- ------- -------\n"); - for (unsigned Col = 0; Col < ColCount; ++Col) - { - if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0) - continue; - - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - Log("%5u %c %c %c", Col, a, q, b); - - if (ColScoresA[Col] == 0.0) - Log(" %7.7s", ""); - else - Log(" %7.1f", ColScoresA[Col]); - - if (ColScoresB[Col] == 0.0) - Log(" %7.7s", ""); - else - Log(" %7.1f", ColScoresB[Col]); - - Log(" %7.1f %7.1f %7.1f %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]); - - Log("\n"); - } - Log("\n"); - Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n", - MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA)); - Log(" LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB); - } -#endif - - string Q3L; - string A3L; - string B3L; - for (unsigned Col = ColLo; Col <= ColHi; ++Col) - { - char q = Q3[Col]; - char a = A3[Col]; - char b = B3[Col]; - - Q3L += q; - A3L += a; - B3L += b; - } - - AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit); - -#if 0 -// CS SNPs - Hit.CS_LY = 0; - Hit.CS_LN = 0; - Hit.CS_RY = 0; - Hit.CS_RN = 0; - Hit.CS_LA = 0; - Hit.CS_RA = 0; - for (unsigned Col = ColLo; Col <= ColHi; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - if (q == a && q == b && a == b) - continue; - if (isgap(q) || isgap(a) || isgap(b)) - continue; - if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]))) - continue; - if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]))) - continue; - - if (!FirstA) - swap(a, b); - - if (Col < ColXLo) - { - if (q == a && q != b) - ++Hit.CS_LY; - else if (q == b && q != a) - ++Hit.CS_LN; - else - ++Hit.CS_LA; - } - else if (Col > ColXHi) - { - if (q == b && q != a) - ++Hit.CS_RY; - else if (q == a && q != b) - ++Hit.CS_RN; - else - ++Hit.CS_RA; - } - } - - double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA); - double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA); - Hit.Score = ScoreL*ScoreR; - - //Hit.QSD = QSD; - //if (FirstA) - // { - // Hit.ASD = ASD; - // Hit.BSD = BSD; - // Hit.PathQA = PathQA; - // Hit.PathQB = PathQB; - // } - //else - // { - // Hit.ASD = BSD; - // Hit.BSD = ASD; - // } - - //Hit.ColLo = ColLo; - //Hit.ColXLo = ColXLo; - //Hit.ColXHi = ColXHi; - //Hit.ColHi = ColHi; - //Hit.Div = Div; - -// Hit.LogMe(); -#endif - } diff --git a/allocs.h b/allocs.h deleted file mode 100644 index 157d03e..0000000 --- a/allocs.h +++ /dev/null @@ -1,24 +0,0 @@ -A(Alpha) -A(Mx) -A(ChainBrute) -A(Chainer) -A(Test) -A(CompressPath) -A(HSPFinder) -A(Main) -A(Clumps) -A(Path) -A(SeqDB) -A(SFasta) -A(SWUngapped) -A(AllocBit) -A(Ultra) -A(UPGMA) -A(Windex) -A(XDropBwd) -A(Xlat) -A(MPath) -A(ScoreCache) -A(TargetHits) -A(Out) -A(Hashdex) diff --git a/alnheuristics.h b/alnheuristics.h deleted file mode 100644 index 9a8d283..0000000 --- a/alnheuristics.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef alnheuristics_h -#define alnheuristics_h - -struct AlnParams; - -struct AlnHeuristics - { - unsigned BandRadius; - unsigned HSPFinderWordLength; - float SeedT; - - float XDropG; // GappedBlast default - float XDropU; // UngappedBlast default - float XDropUG; // UngappedBlast called by GappedBlast - - unsigned MinGlobalHSPLength; - - AlnHeuristics(); - void InitFromCmdLine(const AlnParams &AP); - void InitGlobalFull(); - - bool IsGlobalFull() const - { - return MinGlobalHSPLength == 0 && BandRadius == 0; - } - - }; - -#endif // alnheuristics_h diff --git a/alnparams.cpp b/alnparams.cpp deleted file mode 100644 index d1b9036..0000000 --- a/alnparams.cpp +++ /dev/null @@ -1,414 +0,0 @@ -#include "myutils.h" -#include // for FLT_MAX -#include "mx.h" -#include "alnparams.h" -#include "hsp.h" - -#define TEST 0 - -void SetBLOSUM62(); -void SetNucSubstMx(double Match, double Mismatch); -void ReadSubstMx(const string &FileName, Mx &Mxf); - -extern Mx g_SubstMxf; -extern float **g_SubstMx; - -void AlnParams::Clear() - { - SubstMxName = 0; - LocalOpen = OBVIOUSLY_WRONG_PENALTY; - LocalExt = OBVIOUSLY_WRONG_PENALTY; - OpenA = OBVIOUSLY_WRONG_PENALTY; - OpenB = OBVIOUSLY_WRONG_PENALTY; - ExtA = OBVIOUSLY_WRONG_PENALTY; - ExtB = OBVIOUSLY_WRONG_PENALTY; - LOpenA = OBVIOUSLY_WRONG_PENALTY; - LOpenB = OBVIOUSLY_WRONG_PENALTY; - ROpenA = OBVIOUSLY_WRONG_PENALTY; - ROpenB = OBVIOUSLY_WRONG_PENALTY; - LExtA = OBVIOUSLY_WRONG_PENALTY; - LExtB = OBVIOUSLY_WRONG_PENALTY; - RExtA = OBVIOUSLY_WRONG_PENALTY; - RExtB = OBVIOUSLY_WRONG_PENALTY; - Nucleo = false; - NucleoSet = false; - } - -bool AlnParams::Is2() const - { - float g = OpenA; - float e = ExtA; - if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g) - return false; - if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e) - return false; - return true; - } - -bool AlnParams::Is4() const - { - float g = OpenA; - float tg = LOpenA; - float e = ExtA; - float te = LExtA; - if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg) - return false; - if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te) - return false; - return true; - } - -const char *AlnParams::GetType() const - { - if (Is2()) - return "2"; - else if (Is4()) - return "4"; - return "12"; - } - -void AlnParams::Init2(const float * const *Mx, float Open, float Ext) - { - SubstMx = Mx; - OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open; - ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext; - } - -void AlnParams::SetLocal(float Open, float Ext) - { - LocalOpen = Open; - LocalExt = Ext; - } - -void AlnParams::Init4(const float * const *Mx, float Open, float Ext, - float TermOpen, float TermExt) - { - SubstMx = Mx; - OpenA = OpenB = Open; - LOpenA = LOpenB = ROpenA = ROpenB = TermOpen; - ExtA = ExtB = Ext; - LExtA = LExtB = RExtA = RExtB = TermExt; - } - -void AlnParams::Init(const AlnParams &AP, const HSPData &HSP, - unsigned LA, unsigned LB) - { - SubstMx = AP.SubstMx; - OpenA = AP.OpenA; - OpenB = AP.OpenB; - ExtA = AP.ExtA; - ExtB = AP.ExtB; - - if (HSP.LeftA()) - { - LOpenA = AP.LOpenA; - LExtA = AP.LExtA; - } - else - { - LOpenA = AP.OpenA; - LExtA = AP.ExtA; - } - - if (HSP.LeftB()) - { - LOpenB = AP.LOpenB; - LExtB = AP.LExtB; - } - else - { - LOpenB = AP.OpenB; - LExtB = AP.ExtB; - } - - if (HSP.RightA(LA)) - { - ROpenA = AP.ROpenA; - RExtA = AP.RExtA; - } - else - { - ROpenA = AP.OpenA; - RExtA = AP.ExtA; - } - - if (HSP.RightB(LB)) - { - ROpenB = AP.ROpenB; - RExtB = AP.RExtB; - } - else - { - ROpenB = AP.OpenB; - RExtB = AP.ExtB; - } - } - -void AlnParams::LogMe() const - { - Log("AlnParams(%s)", GetType()); - if (Is2()) - Log(" g=%.1f e=%.1f", -OpenA, -ExtA); - else if (Is4()) - Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA); - else - Log( -" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f", - OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB); - Log("\n"); - } - -/*** -Open/Ext format string is one or more: - [...] - -Value is (positive) penalty or * (disabled). -Flag is: - Q Query. - T Target sequence. - I Internal gaps (defafault internal and terminal). - E End gaps (default internal and terminal). - L Left end. - R Right end. -***/ - -static void ParseGapStr(const string &s, - float &QI, float &QL, float &QR, - float &TI, float &TL, float &TR) - { - if (s.empty()) - return; - - bool Q = false; - bool T = false; - bool I = false; - bool E = false; - bool L = false; - bool R = false; - - const unsigned K = SIZE(s); - unsigned Dec = 0; - float Value = FLT_MAX; - for (unsigned i = 0; i <= K; ++i) - { - char c = s.c_str()[i]; - if (c == 0 || c == '/') - { - if (Value == FLT_MAX) - Die("Invalid gap penalty string, missing penalty '%s'", s.c_str()); - if (!Q && !T && !I && !E && !L && !R) - { - Q = true; - T = true; - L = true; - R = true; - I = true; - } - - if (!E && !I && !L && !R) - { - E = false; - I = true; - L = true; - R = true; - } - - if (E) - { - if (L || R) - Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str()); - L = true; - R = true; - } - - if (!Q && !T) - { - Q = true; - T = true; - } - - if (Q && L) - QL = -Value; - if (Q && R) - QR = -Value; - if (Q && I) - QI = -Value; - if (T && L) - TL = -Value; - if (T && R) - TR = -Value; - if (T && I) - TI = -Value; - - Value = FLT_MAX; - Dec = 0; - Q = false; - T = false; - I = false; - E = false; - L = false; - R = false; - } - else if (c == '*') - { - if (Value != FLT_MAX) - Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str()); - Value = -MINUS_INFINITY; - } - else if (isdigit(c)) - { - if (Value == -MINUS_INFINITY) - Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str()); - if (Value == FLT_MAX) - Value = 0.0; - if (Dec > 0) - { - Dec *= 10; - Value += float(c - '0')/Dec; - } - else - Value = Value*10 + (c - '0'); - } - else if (c == '.') - { - if (Dec > 0) - Die("Invalid gap penalty (two decimal points) '%s'", s.c_str()); - Dec = 1; - } - else - { - switch (c) - { - case 'Q': - Q = true; - break; - case 'T': - T = true; - break; - case 'I': - I = true; - break; - case 'L': - L = true; - break; - case 'R': - R = true; - break; - case 'E': - E = true; - break; - default: - Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str()); - } - } - } - } - -void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr) - { - ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB); - ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB); - } - -void AlnParams::SetMxFromCmdLine(bool IsNucleo) - { - if (IsNucleo) - SetNucSubstMx(opt_match, opt_mismatch); - else - { - if (opt_matrix == "") - { - SubstMxName = "BLOSUM62"; - SetBLOSUM62(); - } - else - { - ReadSubstMx(opt_matrix, g_SubstMxf); - g_SubstMx = g_SubstMxf.GetData(); - g_SubstMxf.LogMe(); - SubstMxName = opt_matrix.c_str(); - } - } - SubstMx = g_SubstMx; - asserta(SubstMx != 0); - } - -void AlnParams::InitFromCmdLine(bool IsNucleo) - { - Clear(); - Nucleo = IsNucleo; - NucleoSet = true; - - SetMxFromCmdLine(IsNucleo); - -// Local - if (optset_lopen || optset_lext) - { - if (!optset_lopen || !optset_lext) - Die("Must set both --lopen and --lext"); - if (opt_lopen < 0.0 || opt_lext < 0.0) - Die("Invalid --lopen/--lext, gap penalties must be >= 0"); - SetLocal(float(-opt_lopen), float(-opt_lext)); - } - else - { - // Same penalties, if-statement to note could differ. - if (IsNucleo) - SetLocal(-10.0f, -1.0f); - else - SetLocal(-10.0f, -1.0f); - } - -// Global - if (IsNucleo) - Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5); - else - Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5); - SetPenalties(opt_gapopen, opt_gapext); - } - -float AlnParams::GetLocalOpen() const - { - return LocalOpen; - } - -float AlnParams::GetLocalExt() const - { - return LocalExt; - } - -bool AlnParams::GetIsNucleo() const - { - asserta(NucleoSet); - return Nucleo; - } - -unsigned GetWindexWordLength(bool Nucleo) - { - if (optset_w) - return opt_w; - - if (Nucleo) - return 8; - else - return 5; - } - -#if TEST -static void Test1(const string &os, const string &es) - { - AlnParams AP; - Log("\n"); - Log("OpenStr %s\n", os.c_str()); - Log(" ExtStr %s\n", es.c_str()); - AP.SetPenalties(os, es); - AP.LogMe(); - } - -void TestGapStr() - { - Test1("17I/0.5E", "1I/0.5E"); - Test1("17I/0.5L/0.4R", "1Q/2T"); - Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI"); - } -#endif // TEST diff --git a/alnparams.h b/alnparams.h deleted file mode 100644 index 4037912..0000000 --- a/alnparams.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef alnparams_h -#define alnparams_h - -struct HSPData; - -// Gap penalty scores are negative -// (i.e., are scores, not penalties). -struct AlnParams - { - const char *SubstMxName; - const float * const *SubstMx; - - bool Nucleo; - bool NucleoSet; - -// Local gaps - float LocalOpen; - float LocalExt; - -// Global internal gaps - float OpenA; - float OpenB; - - float ExtA; - float ExtB; - -// Global terminal gaps - float LOpenA; - float LOpenB; - float ROpenA; - float ROpenB; - - float LExtA; - float LExtB; - float RExtA; - float RExtB; - - void Clear(); - void SetLocal(float Open, float Ext); - void Init2(const float * const *Mx, float Open, float Ext); - void Init4(const float * const *Mx, float Open, float Ext, float TermOpen, float TermExt); - void Init(const AlnParams &AP, const HSPData &HSP, unsigned LA, unsigned LB); - void InitFromCmdLine(bool Nucleo); - void SetMxFromCmdLine(bool Nucleo); - void SetPenalties(const string &OpenStr, const string &ExtStr); - float GetLocalOpen() const; - float GetLocalExt() const; - bool GetIsNucleo() const; - - bool Is2() const; - bool Is4() const; - const char *GetType() const; - - void LogMe() const; - }; - -const float OBVIOUSLY_WRONG_PENALTY = 1000.0; - -#endif // alnparams_h diff --git a/alpha.cpp b/alpha.cpp deleted file mode 100644 index 0efca3b..0000000 --- a/alpha.cpp +++ /dev/null @@ -1,2761 +0,0 @@ -// Generated by /p/py/alphac.py -#include "alpha.h" - -unsigned g_CharToLetterAminoStop[256] = - { - INVALID_LETTER, // [ 0] 0x00 - INVALID_LETTER, // [ 1] 0x01 - INVALID_LETTER, // [ 2] 0x02 - INVALID_LETTER, // [ 3] 0x03 - INVALID_LETTER, // [ 4] 0x04 - INVALID_LETTER, // [ 5] 0x05 - INVALID_LETTER, // [ 6] 0x06 - INVALID_LETTER, // [ 7] 0x07 - INVALID_LETTER, // [ 8] 0x08 - INVALID_LETTER, // [ 9] 0x09 - INVALID_LETTER, // [ 10] 0x0a - INVALID_LETTER, // [ 11] 0x0b - INVALID_LETTER, // [ 12] 0x0c - INVALID_LETTER, // [ 13] 0x0d - INVALID_LETTER, // [ 14] 0x0e - INVALID_LETTER, // [ 15] 0x0f - INVALID_LETTER, // [ 16] 0x10 - INVALID_LETTER, // [ 17] 0x11 - INVALID_LETTER, // [ 18] 0x12 - INVALID_LETTER, // [ 19] 0x13 - INVALID_LETTER, // [ 20] 0x14 - INVALID_LETTER, // [ 21] 0x15 - INVALID_LETTER, // [ 22] 0x16 - INVALID_LETTER, // [ 23] 0x17 - INVALID_LETTER, // [ 24] 0x18 - INVALID_LETTER, // [ 25] 0x19 - INVALID_LETTER, // [ 26] 0x1a - INVALID_LETTER, // [ 27] 0x1b - INVALID_LETTER, // [ 28] 0x1c - INVALID_LETTER, // [ 29] 0x1d - INVALID_LETTER, // [ 30] 0x1e - INVALID_LETTER, // [ 31] 0x1f - INVALID_LETTER, // [ 32] ' ' - INVALID_LETTER, // [ 33] '!' - INVALID_LETTER, // [ 34] '"' - INVALID_LETTER, // [ 35] '#' - INVALID_LETTER, // [ 36] '$' - INVALID_LETTER, // [ 37] '%' - INVALID_LETTER, // [ 38] '&' - INVALID_LETTER, // [ 39] ''' - INVALID_LETTER, // [ 40] '(' - INVALID_LETTER, // [ 41] ')' - 20 , // [ 42] '*' = STP - INVALID_LETTER, // [ 43] '+' - INVALID_LETTER, // [ 44] ',' - INVALID_LETTER, // [ 45] '-' - INVALID_LETTER, // [ 46] '.' - INVALID_LETTER, // [ 47] '/' - INVALID_LETTER, // [ 48] '0' - INVALID_LETTER, // [ 49] '1' - INVALID_LETTER, // [ 50] '2' - INVALID_LETTER, // [ 51] '3' - INVALID_LETTER, // [ 52] '4' - INVALID_LETTER, // [ 53] '5' - INVALID_LETTER, // [ 54] '6' - INVALID_LETTER, // [ 55] '7' - INVALID_LETTER, // [ 56] '8' - INVALID_LETTER, // [ 57] '9' - INVALID_LETTER, // [ 58] ':' - INVALID_LETTER, // [ 59] ';' - INVALID_LETTER, // [ 60] '<' - INVALID_LETTER, // [ 61] '=' - INVALID_LETTER, // [ 62] '>' - INVALID_LETTER, // [ 63] '?' - INVALID_LETTER, // [ 64] '@' - 0 , // [ 65] 'A' = Ala - INVALID_LETTER, // [ 66] 'B' - 1 , // [ 67] 'C' = Cys - 2 , // [ 68] 'D' = Asp - 3 , // [ 69] 'E' = Glu - 4 , // [ 70] 'F' = Phe - 5 , // [ 71] 'G' = Gly - 6 , // [ 72] 'H' = His - 7 , // [ 73] 'I' = Ile - INVALID_LETTER, // [ 74] 'J' - 8 , // [ 75] 'K' = Lys - 9 , // [ 76] 'L' = Leu - 10 , // [ 77] 'M' = Met - 11 , // [ 78] 'N' = Asn - INVALID_LETTER, // [ 79] 'O' - 12 , // [ 80] 'P' = Pro - 13 , // [ 81] 'Q' = Gln - 14 , // [ 82] 'R' = Arg - 15 , // [ 83] 'S' = Ser - 16 , // [ 84] 'T' = Thr - INVALID_LETTER, // [ 85] 'U' - 17 , // [ 86] 'V' = Val - 18 , // [ 87] 'W' = Trp - INVALID_LETTER, // [ 88] 'X' - 19 , // [ 89] 'Y' = Tyr - INVALID_LETTER, // [ 90] 'Z' - INVALID_LETTER, // [ 91] '[' - INVALID_LETTER, // [ 92] '\' - INVALID_LETTER, // [ 93] ']' - INVALID_LETTER, // [ 94] '^' - INVALID_LETTER, // [ 95] '_' - INVALID_LETTER, // [ 96] '`' - 0 , // [ 97] 'a' = Ala - INVALID_LETTER, // [ 98] 'b' - 1 , // [ 99] 'c' = Cys - 2 , // [100] 'd' = Asp - 3 , // [101] 'e' = Glu - 4 , // [102] 'f' = Phe - 5 , // [103] 'g' = Gly - 6 , // [104] 'h' = His - 7 , // [105] 'i' = Ile - INVALID_LETTER, // [106] 'j' - 8 , // [107] 'k' = Lys - 9 , // [108] 'l' = Leu - 10 , // [109] 'm' = Met - 11 , // [110] 'n' = Asn - INVALID_LETTER, // [111] 'o' - 12 , // [112] 'p' = Pro - 13 , // [113] 'q' = Gln - 14 , // [114] 'r' = Arg - 15 , // [115] 's' = Ser - 16 , // [116] 't' = Thr - INVALID_LETTER, // [117] 'u' - 17 , // [118] 'v' = Val - 18 , // [119] 'w' = Trp - INVALID_LETTER, // [120] 'x' - 19 , // [121] 'y' = Tyr - INVALID_LETTER, // [122] 'z' - INVALID_LETTER, // [123] '{' - INVALID_LETTER, // [124] '|' - INVALID_LETTER, // [125] '}' - INVALID_LETTER, // [126] '~' - INVALID_LETTER, // [127] 0x7f - INVALID_LETTER, // [128] 0x80 - INVALID_LETTER, // [129] 0x81 - INVALID_LETTER, // [130] 0x82 - INVALID_LETTER, // [131] 0x83 - INVALID_LETTER, // [132] 0x84 - INVALID_LETTER, // [133] 0x85 - INVALID_LETTER, // [134] 0x86 - INVALID_LETTER, // [135] 0x87 - INVALID_LETTER, // [136] 0x88 - INVALID_LETTER, // [137] 0x89 - INVALID_LETTER, // [138] 0x8a - INVALID_LETTER, // [139] 0x8b - INVALID_LETTER, // [140] 0x8c - INVALID_LETTER, // [141] 0x8d - INVALID_LETTER, // [142] 0x8e - INVALID_LETTER, // [143] 0x8f - INVALID_LETTER, // [144] 0x90 - INVALID_LETTER, // [145] 0x91 - INVALID_LETTER, // [146] 0x92 - INVALID_LETTER, // [147] 0x93 - INVALID_LETTER, // [148] 0x94 - INVALID_LETTER, // [149] 0x95 - INVALID_LETTER, // [150] 0x96 - INVALID_LETTER, // [151] 0x97 - INVALID_LETTER, // [152] 0x98 - INVALID_LETTER, // [153] 0x99 - INVALID_LETTER, // [154] 0x9a - INVALID_LETTER, // [155] 0x9b - INVALID_LETTER, // [156] 0x9c - INVALID_LETTER, // [157] 0x9d - INVALID_LETTER, // [158] 0x9e - INVALID_LETTER, // [159] 0x9f - INVALID_LETTER, // [160] 0xa0 - INVALID_LETTER, // [161] 0xa1 - INVALID_LETTER, // [162] 0xa2 - INVALID_LETTER, // [163] 0xa3 - INVALID_LETTER, // [164] 0xa4 - INVALID_LETTER, // [165] 0xa5 - INVALID_LETTER, // [166] 0xa6 - INVALID_LETTER, // [167] 0xa7 - INVALID_LETTER, // [168] 0xa8 - INVALID_LETTER, // [169] 0xa9 - INVALID_LETTER, // [170] 0xaa - INVALID_LETTER, // [171] 0xab - INVALID_LETTER, // [172] 0xac - INVALID_LETTER, // [173] 0xad - INVALID_LETTER, // [174] 0xae - INVALID_LETTER, // [175] 0xaf - INVALID_LETTER, // [176] 0xb0 - INVALID_LETTER, // [177] 0xb1 - INVALID_LETTER, // [178] 0xb2 - INVALID_LETTER, // [179] 0xb3 - INVALID_LETTER, // [180] 0xb4 - INVALID_LETTER, // [181] 0xb5 - INVALID_LETTER, // [182] 0xb6 - INVALID_LETTER, // [183] 0xb7 - INVALID_LETTER, // [184] 0xb8 - INVALID_LETTER, // [185] 0xb9 - INVALID_LETTER, // [186] 0xba - INVALID_LETTER, // [187] 0xbb - INVALID_LETTER, // [188] 0xbc - INVALID_LETTER, // [189] 0xbd - INVALID_LETTER, // [190] 0xbe - INVALID_LETTER, // [191] 0xbf - INVALID_LETTER, // [192] 0xc0 - INVALID_LETTER, // [193] 0xc1 - INVALID_LETTER, // [194] 0xc2 - INVALID_LETTER, // [195] 0xc3 - INVALID_LETTER, // [196] 0xc4 - INVALID_LETTER, // [197] 0xc5 - INVALID_LETTER, // [198] 0xc6 - INVALID_LETTER, // [199] 0xc7 - INVALID_LETTER, // [200] 0xc8 - INVALID_LETTER, // [201] 0xc9 - INVALID_LETTER, // [202] 0xca - INVALID_LETTER, // [203] 0xcb - INVALID_LETTER, // [204] 0xcc - INVALID_LETTER, // [205] 0xcd - INVALID_LETTER, // [206] 0xce - INVALID_LETTER, // [207] 0xcf - INVALID_LETTER, // [208] 0xd0 - INVALID_LETTER, // [209] 0xd1 - INVALID_LETTER, // [210] 0xd2 - INVALID_LETTER, // [211] 0xd3 - INVALID_LETTER, // [212] 0xd4 - INVALID_LETTER, // [213] 0xd5 - INVALID_LETTER, // [214] 0xd6 - INVALID_LETTER, // [215] 0xd7 - INVALID_LETTER, // [216] 0xd8 - INVALID_LETTER, // [217] 0xd9 - INVALID_LETTER, // [218] 0xda - INVALID_LETTER, // [219] 0xdb - INVALID_LETTER, // [220] 0xdc - INVALID_LETTER, // [221] 0xdd - INVALID_LETTER, // [222] 0xde - INVALID_LETTER, // [223] 0xdf - INVALID_LETTER, // [224] 0xe0 - INVALID_LETTER, // [225] 0xe1 - INVALID_LETTER, // [226] 0xe2 - INVALID_LETTER, // [227] 0xe3 - INVALID_LETTER, // [228] 0xe4 - INVALID_LETTER, // [229] 0xe5 - INVALID_LETTER, // [230] 0xe6 - INVALID_LETTER, // [231] 0xe7 - INVALID_LETTER, // [232] 0xe8 - INVALID_LETTER, // [233] 0xe9 - INVALID_LETTER, // [234] 0xea - INVALID_LETTER, // [235] 0xeb - INVALID_LETTER, // [236] 0xec - INVALID_LETTER, // [237] 0xed - INVALID_LETTER, // [238] 0xee - INVALID_LETTER, // [239] 0xef - INVALID_LETTER, // [240] 0xf0 - INVALID_LETTER, // [241] 0xf1 - INVALID_LETTER, // [242] 0xf2 - INVALID_LETTER, // [243] 0xf3 - INVALID_LETTER, // [244] 0xf4 - INVALID_LETTER, // [245] 0xf5 - INVALID_LETTER, // [246] 0xf6 - INVALID_LETTER, // [247] 0xf7 - INVALID_LETTER, // [248] 0xf8 - INVALID_LETTER, // [249] 0xf9 - INVALID_LETTER, // [250] 0xfa - INVALID_LETTER, // [251] 0xfb - INVALID_LETTER, // [252] 0xfc - INVALID_LETTER, // [253] 0xfd - INVALID_LETTER, // [254] 0xfe - INVALID_LETTER, // [255] 0xff - }; -unsigned g_CharToLetterAmino[256] = - { - INVALID_LETTER, // [ 0] 0x00 - INVALID_LETTER, // [ 1] 0x01 - INVALID_LETTER, // [ 2] 0x02 - INVALID_LETTER, // [ 3] 0x03 - INVALID_LETTER, // [ 4] 0x04 - INVALID_LETTER, // [ 5] 0x05 - INVALID_LETTER, // [ 6] 0x06 - INVALID_LETTER, // [ 7] 0x07 - INVALID_LETTER, // [ 8] 0x08 - INVALID_LETTER, // [ 9] 0x09 - INVALID_LETTER, // [ 10] 0x0a - INVALID_LETTER, // [ 11] 0x0b - INVALID_LETTER, // [ 12] 0x0c - INVALID_LETTER, // [ 13] 0x0d - INVALID_LETTER, // [ 14] 0x0e - INVALID_LETTER, // [ 15] 0x0f - INVALID_LETTER, // [ 16] 0x10 - INVALID_LETTER, // [ 17] 0x11 - INVALID_LETTER, // [ 18] 0x12 - INVALID_LETTER, // [ 19] 0x13 - INVALID_LETTER, // [ 20] 0x14 - INVALID_LETTER, // [ 21] 0x15 - INVALID_LETTER, // [ 22] 0x16 - INVALID_LETTER, // [ 23] 0x17 - INVALID_LETTER, // [ 24] 0x18 - INVALID_LETTER, // [ 25] 0x19 - INVALID_LETTER, // [ 26] 0x1a - INVALID_LETTER, // [ 27] 0x1b - INVALID_LETTER, // [ 28] 0x1c - INVALID_LETTER, // [ 29] 0x1d - INVALID_LETTER, // [ 30] 0x1e - INVALID_LETTER, // [ 31] 0x1f - INVALID_LETTER, // [ 32] ' ' - INVALID_LETTER, // [ 33] '!' - INVALID_LETTER, // [ 34] '"' - INVALID_LETTER, // [ 35] '#' - INVALID_LETTER, // [ 36] '$' - INVALID_LETTER, // [ 37] '%' - INVALID_LETTER, // [ 38] '&' - INVALID_LETTER, // [ 39] ''' - INVALID_LETTER, // [ 40] '(' - INVALID_LETTER, // [ 41] ')' - INVALID_LETTER, // [ 42] '*' - INVALID_LETTER, // [ 43] '+' - INVALID_LETTER, // [ 44] ',' - INVALID_LETTER, // [ 45] '-' - INVALID_LETTER, // [ 46] '.' - INVALID_LETTER, // [ 47] '/' - INVALID_LETTER, // [ 48] '0' - INVALID_LETTER, // [ 49] '1' - INVALID_LETTER, // [ 50] '2' - INVALID_LETTER, // [ 51] '3' - INVALID_LETTER, // [ 52] '4' - INVALID_LETTER, // [ 53] '5' - INVALID_LETTER, // [ 54] '6' - INVALID_LETTER, // [ 55] '7' - INVALID_LETTER, // [ 56] '8' - INVALID_LETTER, // [ 57] '9' - INVALID_LETTER, // [ 58] ':' - INVALID_LETTER, // [ 59] ';' - INVALID_LETTER, // [ 60] '<' - INVALID_LETTER, // [ 61] '=' - INVALID_LETTER, // [ 62] '>' - INVALID_LETTER, // [ 63] '?' - INVALID_LETTER, // [ 64] '@' - 0 , // [ 65] 'A' = Ala - INVALID_LETTER, // [ 66] 'B' - 1 , // [ 67] 'C' = Cys - 2 , // [ 68] 'D' = Asp - 3 , // [ 69] 'E' = Glu - 4 , // [ 70] 'F' = Phe - 5 , // [ 71] 'G' = Gly - 6 , // [ 72] 'H' = His - 7 , // [ 73] 'I' = Ile - INVALID_LETTER, // [ 74] 'J' - 8 , // [ 75] 'K' = Lys - 9 , // [ 76] 'L' = Leu - 10 , // [ 77] 'M' = Met - 11 , // [ 78] 'N' = Asn - INVALID_LETTER, // [ 79] 'O' - 12 , // [ 80] 'P' = Pro - 13 , // [ 81] 'Q' = Gln - 14 , // [ 82] 'R' = Arg - 15 , // [ 83] 'S' = Ser - 16 , // [ 84] 'T' = Thr - INVALID_LETTER, // [ 85] 'U' - 17 , // [ 86] 'V' = Val - 18 , // [ 87] 'W' = Trp - INVALID_LETTER, // [ 88] 'X' - 19 , // [ 89] 'Y' = Tyr - INVALID_LETTER, // [ 90] 'Z' - INVALID_LETTER, // [ 91] '[' - INVALID_LETTER, // [ 92] '\' - INVALID_LETTER, // [ 93] ']' - INVALID_LETTER, // [ 94] '^' - INVALID_LETTER, // [ 95] '_' - INVALID_LETTER, // [ 96] '`' - 0 , // [ 97] 'a' = Ala - INVALID_LETTER, // [ 98] 'b' - 1 , // [ 99] 'c' = Cys - 2 , // [100] 'd' = Asp - 3 , // [101] 'e' = Glu - 4 , // [102] 'f' = Phe - 5 , // [103] 'g' = Gly - 6 , // [104] 'h' = His - 7 , // [105] 'i' = Ile - INVALID_LETTER, // [106] 'j' - 8 , // [107] 'k' = Lys - 9 , // [108] 'l' = Leu - 10 , // [109] 'm' = Met - 11 , // [110] 'n' = Asn - INVALID_LETTER, // [111] 'o' - 12 , // [112] 'p' = Pro - 13 , // [113] 'q' = Gln - 14 , // [114] 'r' = Arg - 15 , // [115] 's' = Ser - 16 , // [116] 't' = Thr - INVALID_LETTER, // [117] 'u' - 17 , // [118] 'v' = Val - 18 , // [119] 'w' = Trp - INVALID_LETTER, // [120] 'x' - 19 , // [121] 'y' = Tyr - INVALID_LETTER, // [122] 'z' - INVALID_LETTER, // [123] '{' - INVALID_LETTER, // [124] '|' - INVALID_LETTER, // [125] '}' - INVALID_LETTER, // [126] '~' - INVALID_LETTER, // [127] 0x7f - INVALID_LETTER, // [128] 0x80 - INVALID_LETTER, // [129] 0x81 - INVALID_LETTER, // [130] 0x82 - INVALID_LETTER, // [131] 0x83 - INVALID_LETTER, // [132] 0x84 - INVALID_LETTER, // [133] 0x85 - INVALID_LETTER, // [134] 0x86 - INVALID_LETTER, // [135] 0x87 - INVALID_LETTER, // [136] 0x88 - INVALID_LETTER, // [137] 0x89 - INVALID_LETTER, // [138] 0x8a - INVALID_LETTER, // [139] 0x8b - INVALID_LETTER, // [140] 0x8c - INVALID_LETTER, // [141] 0x8d - INVALID_LETTER, // [142] 0x8e - INVALID_LETTER, // [143] 0x8f - INVALID_LETTER, // [144] 0x90 - INVALID_LETTER, // [145] 0x91 - INVALID_LETTER, // [146] 0x92 - INVALID_LETTER, // [147] 0x93 - INVALID_LETTER, // [148] 0x94 - INVALID_LETTER, // [149] 0x95 - INVALID_LETTER, // [150] 0x96 - INVALID_LETTER, // [151] 0x97 - INVALID_LETTER, // [152] 0x98 - INVALID_LETTER, // [153] 0x99 - INVALID_LETTER, // [154] 0x9a - INVALID_LETTER, // [155] 0x9b - INVALID_LETTER, // [156] 0x9c - INVALID_LETTER, // [157] 0x9d - INVALID_LETTER, // [158] 0x9e - INVALID_LETTER, // [159] 0x9f - INVALID_LETTER, // [160] 0xa0 - INVALID_LETTER, // [161] 0xa1 - INVALID_LETTER, // [162] 0xa2 - INVALID_LETTER, // [163] 0xa3 - INVALID_LETTER, // [164] 0xa4 - INVALID_LETTER, // [165] 0xa5 - INVALID_LETTER, // [166] 0xa6 - INVALID_LETTER, // [167] 0xa7 - INVALID_LETTER, // [168] 0xa8 - INVALID_LETTER, // [169] 0xa9 - INVALID_LETTER, // [170] 0xaa - INVALID_LETTER, // [171] 0xab - INVALID_LETTER, // [172] 0xac - INVALID_LETTER, // [173] 0xad - INVALID_LETTER, // [174] 0xae - INVALID_LETTER, // [175] 0xaf - INVALID_LETTER, // [176] 0xb0 - INVALID_LETTER, // [177] 0xb1 - INVALID_LETTER, // [178] 0xb2 - INVALID_LETTER, // [179] 0xb3 - INVALID_LETTER, // [180] 0xb4 - INVALID_LETTER, // [181] 0xb5 - INVALID_LETTER, // [182] 0xb6 - INVALID_LETTER, // [183] 0xb7 - INVALID_LETTER, // [184] 0xb8 - INVALID_LETTER, // [185] 0xb9 - INVALID_LETTER, // [186] 0xba - INVALID_LETTER, // [187] 0xbb - INVALID_LETTER, // [188] 0xbc - INVALID_LETTER, // [189] 0xbd - INVALID_LETTER, // [190] 0xbe - INVALID_LETTER, // [191] 0xbf - INVALID_LETTER, // [192] 0xc0 - INVALID_LETTER, // [193] 0xc1 - INVALID_LETTER, // [194] 0xc2 - INVALID_LETTER, // [195] 0xc3 - INVALID_LETTER, // [196] 0xc4 - INVALID_LETTER, // [197] 0xc5 - INVALID_LETTER, // [198] 0xc6 - INVALID_LETTER, // [199] 0xc7 - INVALID_LETTER, // [200] 0xc8 - INVALID_LETTER, // [201] 0xc9 - INVALID_LETTER, // [202] 0xca - INVALID_LETTER, // [203] 0xcb - INVALID_LETTER, // [204] 0xcc - INVALID_LETTER, // [205] 0xcd - INVALID_LETTER, // [206] 0xce - INVALID_LETTER, // [207] 0xcf - INVALID_LETTER, // [208] 0xd0 - INVALID_LETTER, // [209] 0xd1 - INVALID_LETTER, // [210] 0xd2 - INVALID_LETTER, // [211] 0xd3 - INVALID_LETTER, // [212] 0xd4 - INVALID_LETTER, // [213] 0xd5 - INVALID_LETTER, // [214] 0xd6 - INVALID_LETTER, // [215] 0xd7 - INVALID_LETTER, // [216] 0xd8 - INVALID_LETTER, // [217] 0xd9 - INVALID_LETTER, // [218] 0xda - INVALID_LETTER, // [219] 0xdb - INVALID_LETTER, // [220] 0xdc - INVALID_LETTER, // [221] 0xdd - INVALID_LETTER, // [222] 0xde - INVALID_LETTER, // [223] 0xdf - INVALID_LETTER, // [224] 0xe0 - INVALID_LETTER, // [225] 0xe1 - INVALID_LETTER, // [226] 0xe2 - INVALID_LETTER, // [227] 0xe3 - INVALID_LETTER, // [228] 0xe4 - INVALID_LETTER, // [229] 0xe5 - INVALID_LETTER, // [230] 0xe6 - INVALID_LETTER, // [231] 0xe7 - INVALID_LETTER, // [232] 0xe8 - INVALID_LETTER, // [233] 0xe9 - INVALID_LETTER, // [234] 0xea - INVALID_LETTER, // [235] 0xeb - INVALID_LETTER, // [236] 0xec - INVALID_LETTER, // [237] 0xed - INVALID_LETTER, // [238] 0xee - INVALID_LETTER, // [239] 0xef - INVALID_LETTER, // [240] 0xf0 - INVALID_LETTER, // [241] 0xf1 - INVALID_LETTER, // [242] 0xf2 - INVALID_LETTER, // [243] 0xf3 - INVALID_LETTER, // [244] 0xf4 - INVALID_LETTER, // [245] 0xf5 - INVALID_LETTER, // [246] 0xf6 - INVALID_LETTER, // [247] 0xf7 - INVALID_LETTER, // [248] 0xf8 - INVALID_LETTER, // [249] 0xf9 - INVALID_LETTER, // [250] 0xfa - INVALID_LETTER, // [251] 0xfb - INVALID_LETTER, // [252] 0xfc - INVALID_LETTER, // [253] 0xfd - INVALID_LETTER, // [254] 0xfe - INVALID_LETTER, // [255] 0xff - }; - -unsigned char g_LetterToCharAmino[256] = - { - 'A', // [0] - 'C', // [1] - 'D', // [2] - 'E', // [3] - 'F', // [4] - 'G', // [5] - 'H', // [6] - 'I', // [7] - 'K', // [8] - 'L', // [9] - 'M', // [10] - 'N', // [11] - 'P', // [12] - 'Q', // [13] - 'R', // [14] - 'S', // [15] - 'T', // [16] - 'V', // [17] - 'W', // [18] - 'Y', // [19] - '*', // [20] - INVALID_CHAR, // [21] - INVALID_CHAR, // [22] - INVALID_CHAR, // [23] - INVALID_CHAR, // [24] - INVALID_CHAR, // [25] - INVALID_CHAR, // [26] - INVALID_CHAR, // [27] - INVALID_CHAR, // [28] - INVALID_CHAR, // [29] - INVALID_CHAR, // [30] - INVALID_CHAR, // [31] - INVALID_CHAR, // [32] - INVALID_CHAR, // [33] - INVALID_CHAR, // [34] - INVALID_CHAR, // [35] - INVALID_CHAR, // [36] - INVALID_CHAR, // [37] - INVALID_CHAR, // [38] - INVALID_CHAR, // [39] - INVALID_CHAR, // [40] - INVALID_CHAR, // [41] - INVALID_CHAR, // [42] - INVALID_CHAR, // [43] - INVALID_CHAR, // [44] - INVALID_CHAR, // [45] - INVALID_CHAR, // [46] - INVALID_CHAR, // [47] - INVALID_CHAR, // [48] - INVALID_CHAR, // [49] - INVALID_CHAR, // [50] - INVALID_CHAR, // [51] - INVALID_CHAR, // [52] - INVALID_CHAR, // [53] - INVALID_CHAR, // [54] - INVALID_CHAR, // [55] - INVALID_CHAR, // [56] - INVALID_CHAR, // [57] - INVALID_CHAR, // [58] - INVALID_CHAR, // [59] - INVALID_CHAR, // [60] - INVALID_CHAR, // [61] - INVALID_CHAR, // [62] - INVALID_CHAR, // [63] - INVALID_CHAR, // [64] - INVALID_CHAR, // [65] - INVALID_CHAR, // [66] - INVALID_CHAR, // [67] - INVALID_CHAR, // [68] - INVALID_CHAR, // [69] - INVALID_CHAR, // [70] - INVALID_CHAR, // [71] - INVALID_CHAR, // [72] - INVALID_CHAR, // [73] - INVALID_CHAR, // [74] - INVALID_CHAR, // [75] - INVALID_CHAR, // [76] - INVALID_CHAR, // [77] - INVALID_CHAR, // [78] - INVALID_CHAR, // [79] - INVALID_CHAR, // [80] - INVALID_CHAR, // [81] - INVALID_CHAR, // [82] - INVALID_CHAR, // [83] - INVALID_CHAR, // [84] - INVALID_CHAR, // [85] - INVALID_CHAR, // [86] - INVALID_CHAR, // [87] - INVALID_CHAR, // [88] - INVALID_CHAR, // [89] - INVALID_CHAR, // [90] - INVALID_CHAR, // [91] - INVALID_CHAR, // [92] - INVALID_CHAR, // [93] - INVALID_CHAR, // [94] - INVALID_CHAR, // [95] - INVALID_CHAR, // [96] - INVALID_CHAR, // [97] - INVALID_CHAR, // [98] - INVALID_CHAR, // [99] - INVALID_CHAR, // [100] - INVALID_CHAR, // [101] - INVALID_CHAR, // [102] - INVALID_CHAR, // [103] - INVALID_CHAR, // [104] - INVALID_CHAR, // [105] - INVALID_CHAR, // [106] - INVALID_CHAR, // [107] - INVALID_CHAR, // [108] - INVALID_CHAR, // [109] - INVALID_CHAR, // [110] - INVALID_CHAR, // [111] - INVALID_CHAR, // [112] - INVALID_CHAR, // [113] - INVALID_CHAR, // [114] - INVALID_CHAR, // [115] - INVALID_CHAR, // [116] - INVALID_CHAR, // [117] - INVALID_CHAR, // [118] - INVALID_CHAR, // [119] - INVALID_CHAR, // [120] - INVALID_CHAR, // [121] - INVALID_CHAR, // [122] - INVALID_CHAR, // [123] - INVALID_CHAR, // [124] - INVALID_CHAR, // [125] - INVALID_CHAR, // [126] - INVALID_CHAR, // [127] - INVALID_CHAR, // [128] - INVALID_CHAR, // [129] - INVALID_CHAR, // [130] - INVALID_CHAR, // [131] - INVALID_CHAR, // [132] - INVALID_CHAR, // [133] - INVALID_CHAR, // [134] - INVALID_CHAR, // [135] - INVALID_CHAR, // [136] - INVALID_CHAR, // [137] - INVALID_CHAR, // [138] - INVALID_CHAR, // [139] - INVALID_CHAR, // [140] - INVALID_CHAR, // [141] - INVALID_CHAR, // [142] - INVALID_CHAR, // [143] - INVALID_CHAR, // [144] - INVALID_CHAR, // [145] - INVALID_CHAR, // [146] - INVALID_CHAR, // [147] - INVALID_CHAR, // [148] - INVALID_CHAR, // [149] - INVALID_CHAR, // [150] - INVALID_CHAR, // [151] - INVALID_CHAR, // [152] - INVALID_CHAR, // [153] - INVALID_CHAR, // [154] - INVALID_CHAR, // [155] - INVALID_CHAR, // [156] - INVALID_CHAR, // [157] - INVALID_CHAR, // [158] - INVALID_CHAR, // [159] - INVALID_CHAR, // [160] - INVALID_CHAR, // [161] - INVALID_CHAR, // [162] - INVALID_CHAR, // [163] - INVALID_CHAR, // [164] - INVALID_CHAR, // [165] - INVALID_CHAR, // [166] - INVALID_CHAR, // [167] - INVALID_CHAR, // [168] - INVALID_CHAR, // [169] - INVALID_CHAR, // [170] - INVALID_CHAR, // [171] - INVALID_CHAR, // [172] - INVALID_CHAR, // [173] - INVALID_CHAR, // [174] - INVALID_CHAR, // [175] - INVALID_CHAR, // [176] - INVALID_CHAR, // [177] - INVALID_CHAR, // [178] - INVALID_CHAR, // [179] - INVALID_CHAR, // [180] - INVALID_CHAR, // [181] - INVALID_CHAR, // [182] - INVALID_CHAR, // [183] - INVALID_CHAR, // [184] - INVALID_CHAR, // [185] - INVALID_CHAR, // [186] - INVALID_CHAR, // [187] - INVALID_CHAR, // [188] - INVALID_CHAR, // [189] - INVALID_CHAR, // [190] - INVALID_CHAR, // [191] - INVALID_CHAR, // [192] - INVALID_CHAR, // [193] - INVALID_CHAR, // [194] - INVALID_CHAR, // [195] - INVALID_CHAR, // [196] - INVALID_CHAR, // [197] - INVALID_CHAR, // [198] - INVALID_CHAR, // [199] - INVALID_CHAR, // [200] - INVALID_CHAR, // [201] - INVALID_CHAR, // [202] - INVALID_CHAR, // [203] - INVALID_CHAR, // [204] - INVALID_CHAR, // [205] - INVALID_CHAR, // [206] - INVALID_CHAR, // [207] - INVALID_CHAR, // [208] - INVALID_CHAR, // [209] - INVALID_CHAR, // [210] - INVALID_CHAR, // [211] - INVALID_CHAR, // [212] - INVALID_CHAR, // [213] - INVALID_CHAR, // [214] - INVALID_CHAR, // [215] - INVALID_CHAR, // [216] - INVALID_CHAR, // [217] - INVALID_CHAR, // [218] - INVALID_CHAR, // [219] - INVALID_CHAR, // [220] - INVALID_CHAR, // [221] - INVALID_CHAR, // [222] - INVALID_CHAR, // [223] - INVALID_CHAR, // [224] - INVALID_CHAR, // [225] - INVALID_CHAR, // [226] - INVALID_CHAR, // [227] - INVALID_CHAR, // [228] - INVALID_CHAR, // [229] - INVALID_CHAR, // [230] - INVALID_CHAR, // [231] - INVALID_CHAR, // [232] - INVALID_CHAR, // [233] - INVALID_CHAR, // [234] - INVALID_CHAR, // [235] - INVALID_CHAR, // [236] - INVALID_CHAR, // [237] - INVALID_CHAR, // [238] - INVALID_CHAR, // [239] - INVALID_CHAR, // [240] - INVALID_CHAR, // [241] - INVALID_CHAR, // [242] - INVALID_CHAR, // [243] - INVALID_CHAR, // [244] - INVALID_CHAR, // [245] - INVALID_CHAR, // [246] - INVALID_CHAR, // [247] - INVALID_CHAR, // [248] - INVALID_CHAR, // [249] - INVALID_CHAR, // [250] - INVALID_CHAR, // [251] - INVALID_CHAR, // [252] - INVALID_CHAR, // [253] - INVALID_CHAR, // [254] - INVALID_CHAR, // [255] - }; - -unsigned g_CharToLetterNucleo[256] = - { - INVALID_LETTER, // [ 0] = 0x00 - INVALID_LETTER, // [ 1] = 0x01 - INVALID_LETTER, // [ 2] = 0x02 - INVALID_LETTER, // [ 3] = 0x03 - INVALID_LETTER, // [ 4] = 0x04 - INVALID_LETTER, // [ 5] = 0x05 - INVALID_LETTER, // [ 6] = 0x06 - INVALID_LETTER, // [ 7] = 0x07 - INVALID_LETTER, // [ 8] = 0x08 - INVALID_LETTER, // [ 9] = 0x09 - INVALID_LETTER, // [ 10] = 0x0a - INVALID_LETTER, // [ 11] = 0x0b - INVALID_LETTER, // [ 12] = 0x0c - INVALID_LETTER, // [ 13] = 0x0d - INVALID_LETTER, // [ 14] = 0x0e - INVALID_LETTER, // [ 15] = 0x0f - INVALID_LETTER, // [ 16] = 0x10 - INVALID_LETTER, // [ 17] = 0x11 - INVALID_LETTER, // [ 18] = 0x12 - INVALID_LETTER, // [ 19] = 0x13 - INVALID_LETTER, // [ 20] = 0x14 - INVALID_LETTER, // [ 21] = 0x15 - INVALID_LETTER, // [ 22] = 0x16 - INVALID_LETTER, // [ 23] = 0x17 - INVALID_LETTER, // [ 24] = 0x18 - INVALID_LETTER, // [ 25] = 0x19 - INVALID_LETTER, // [ 26] = 0x1a - INVALID_LETTER, // [ 27] = 0x1b - INVALID_LETTER, // [ 28] = 0x1c - INVALID_LETTER, // [ 29] = 0x1d - INVALID_LETTER, // [ 30] = 0x1e - INVALID_LETTER, // [ 31] = 0x1f - INVALID_LETTER, // [ 32] = 32 - INVALID_LETTER, // [ 33] = 33 - INVALID_LETTER, // [ 34] = 34 - INVALID_LETTER, // [ 35] = 35 - INVALID_LETTER, // [ 36] = 36 - INVALID_LETTER, // [ 37] = 37 - INVALID_LETTER, // [ 38] = 38 - INVALID_LETTER, // [ 39] = 39 - INVALID_LETTER, // [ 40] = 40 - INVALID_LETTER, // [ 41] = 41 - INVALID_LETTER, // [ 42] = 42 - INVALID_LETTER, // [ 43] = 43 - INVALID_LETTER, // [ 44] = 44 - INVALID_LETTER, // [ 45] = 45 - INVALID_LETTER, // [ 46] = 46 - INVALID_LETTER, // [ 47] = 47 - INVALID_LETTER, // [ 48] = 48 - INVALID_LETTER, // [ 49] = 49 - INVALID_LETTER, // [ 50] = 50 - INVALID_LETTER, // [ 51] = 51 - INVALID_LETTER, // [ 52] = 52 - INVALID_LETTER, // [ 53] = 53 - INVALID_LETTER, // [ 54] = 54 - INVALID_LETTER, // [ 55] = 55 - INVALID_LETTER, // [ 56] = 56 - INVALID_LETTER, // [ 57] = 57 - INVALID_LETTER, // [ 58] = 58 - INVALID_LETTER, // [ 59] = 59 - INVALID_LETTER, // [ 60] = 60 - INVALID_LETTER, // [ 61] = 61 - INVALID_LETTER, // [ 62] = 62 - INVALID_LETTER, // [ 63] = 63 - INVALID_LETTER, // [ 64] = 64 - 0 , // [ 65] = A (Nucleotide) - INVALID_LETTER, // [ 66] = 66 - 1 , // [ 67] = C (Nucleotide) - INVALID_LETTER, // [ 68] = 68 - INVALID_LETTER, // [ 69] = 69 - INVALID_LETTER, // [ 70] = 70 - 2 , // [ 71] = G (Nucleotide) - INVALID_LETTER, // [ 72] = 72 - INVALID_LETTER, // [ 73] = 73 - INVALID_LETTER, // [ 74] = 74 - INVALID_LETTER, // [ 75] = 75 - INVALID_LETTER, // [ 76] = 76 - INVALID_LETTER, // [ 77] = 77 - INVALID_LETTER, // [ 78] = 78 - INVALID_LETTER, // [ 79] = 79 - INVALID_LETTER, // [ 80] = 80 - INVALID_LETTER, // [ 81] = 81 - INVALID_LETTER, // [ 82] = 82 - INVALID_LETTER, // [ 83] = 83 - 3 , // [ 84] = T (Nucleotide) - 3 , // [ 85] = U (Nucleotide) - INVALID_LETTER, // [ 86] = 86 - INVALID_LETTER, // [ 87] = 87 - INVALID_LETTER, // [ 88] = 88 - INVALID_LETTER, // [ 89] = 89 - INVALID_LETTER, // [ 90] = 90 - INVALID_LETTER, // [ 91] = 91 - INVALID_LETTER, // [ 92] = 92 - INVALID_LETTER, // [ 93] = 93 - INVALID_LETTER, // [ 94] = 94 - INVALID_LETTER, // [ 95] = 95 - INVALID_LETTER, // [ 96] = 96 - 0 , // [ 97] = a (Nucleotide) - INVALID_LETTER, // [ 98] = 98 - 1 , // [ 99] = c (Nucleotide) - INVALID_LETTER, // [100] = 100 - INVALID_LETTER, // [101] = 101 - INVALID_LETTER, // [102] = 102 - 2 , // [103] = g (Nucleotide) - INVALID_LETTER, // [104] = 104 - INVALID_LETTER, // [105] = 105 - INVALID_LETTER, // [106] = 106 - INVALID_LETTER, // [107] = 107 - INVALID_LETTER, // [108] = 108 - INVALID_LETTER, // [109] = 109 - INVALID_LETTER, // [110] = 110 - INVALID_LETTER, // [111] = 111 - INVALID_LETTER, // [112] = 112 - INVALID_LETTER, // [113] = 113 - INVALID_LETTER, // [114] = 114 - INVALID_LETTER, // [115] = 115 - 3 , // [116] = t (Nucleotide) - 3 , // [117] = u (Nucleotide) - INVALID_LETTER, // [118] = 118 - INVALID_LETTER, // [119] = 119 - INVALID_LETTER, // [120] = 120 - INVALID_LETTER, // [121] = 121 - INVALID_LETTER, // [122] = 122 - INVALID_LETTER, // [123] = 123 - INVALID_LETTER, // [124] = 124 - INVALID_LETTER, // [125] = 125 - INVALID_LETTER, // [126] = 126 - INVALID_LETTER, // [127] = 0x7f - INVALID_LETTER, // [128] = 0x80 - INVALID_LETTER, // [129] = 0x81 - INVALID_LETTER, // [130] = 0x82 - INVALID_LETTER, // [131] = 0x83 - INVALID_LETTER, // [132] = 0x84 - INVALID_LETTER, // [133] = 0x85 - INVALID_LETTER, // [134] = 0x86 - INVALID_LETTER, // [135] = 0x87 - INVALID_LETTER, // [136] = 0x88 - INVALID_LETTER, // [137] = 0x89 - INVALID_LETTER, // [138] = 0x8a - INVALID_LETTER, // [139] = 0x8b - INVALID_LETTER, // [140] = 0x8c - INVALID_LETTER, // [141] = 0x8d - INVALID_LETTER, // [142] = 0x8e - INVALID_LETTER, // [143] = 0x8f - INVALID_LETTER, // [144] = 0x90 - INVALID_LETTER, // [145] = 0x91 - INVALID_LETTER, // [146] = 0x92 - INVALID_LETTER, // [147] = 0x93 - INVALID_LETTER, // [148] = 0x94 - INVALID_LETTER, // [149] = 0x95 - INVALID_LETTER, // [150] = 0x96 - INVALID_LETTER, // [151] = 0x97 - INVALID_LETTER, // [152] = 0x98 - INVALID_LETTER, // [153] = 0x99 - INVALID_LETTER, // [154] = 0x9a - INVALID_LETTER, // [155] = 0x9b - INVALID_LETTER, // [156] = 0x9c - INVALID_LETTER, // [157] = 0x9d - INVALID_LETTER, // [158] = 0x9e - INVALID_LETTER, // [159] = 0x9f - INVALID_LETTER, // [160] = 0xa0 - INVALID_LETTER, // [161] = 0xa1 - INVALID_LETTER, // [162] = 0xa2 - INVALID_LETTER, // [163] = 0xa3 - INVALID_LETTER, // [164] = 0xa4 - INVALID_LETTER, // [165] = 0xa5 - INVALID_LETTER, // [166] = 0xa6 - INVALID_LETTER, // [167] = 0xa7 - INVALID_LETTER, // [168] = 0xa8 - INVALID_LETTER, // [169] = 0xa9 - INVALID_LETTER, // [170] = 0xaa - INVALID_LETTER, // [171] = 0xab - INVALID_LETTER, // [172] = 0xac - INVALID_LETTER, // [173] = 0xad - INVALID_LETTER, // [174] = 0xae - INVALID_LETTER, // [175] = 0xaf - INVALID_LETTER, // [176] = 0xb0 - INVALID_LETTER, // [177] = 0xb1 - INVALID_LETTER, // [178] = 0xb2 - INVALID_LETTER, // [179] = 0xb3 - INVALID_LETTER, // [180] = 0xb4 - INVALID_LETTER, // [181] = 0xb5 - INVALID_LETTER, // [182] = 0xb6 - INVALID_LETTER, // [183] = 0xb7 - INVALID_LETTER, // [184] = 0xb8 - INVALID_LETTER, // [185] = 0xb9 - INVALID_LETTER, // [186] = 0xba - INVALID_LETTER, // [187] = 0xbb - INVALID_LETTER, // [188] = 0xbc - INVALID_LETTER, // [189] = 0xbd - INVALID_LETTER, // [190] = 0xbe - INVALID_LETTER, // [191] = 0xbf - INVALID_LETTER, // [192] = 0xc0 - INVALID_LETTER, // [193] = 0xc1 - INVALID_LETTER, // [194] = 0xc2 - INVALID_LETTER, // [195] = 0xc3 - INVALID_LETTER, // [196] = 0xc4 - INVALID_LETTER, // [197] = 0xc5 - INVALID_LETTER, // [198] = 0xc6 - INVALID_LETTER, // [199] = 0xc7 - INVALID_LETTER, // [200] = 0xc8 - INVALID_LETTER, // [201] = 0xc9 - INVALID_LETTER, // [202] = 0xca - INVALID_LETTER, // [203] = 0xcb - INVALID_LETTER, // [204] = 0xcc - INVALID_LETTER, // [205] = 0xcd - INVALID_LETTER, // [206] = 0xce - INVALID_LETTER, // [207] = 0xcf - INVALID_LETTER, // [208] = 0xd0 - INVALID_LETTER, // [209] = 0xd1 - INVALID_LETTER, // [210] = 0xd2 - INVALID_LETTER, // [211] = 0xd3 - INVALID_LETTER, // [212] = 0xd4 - INVALID_LETTER, // [213] = 0xd5 - INVALID_LETTER, // [214] = 0xd6 - INVALID_LETTER, // [215] = 0xd7 - INVALID_LETTER, // [216] = 0xd8 - INVALID_LETTER, // [217] = 0xd9 - INVALID_LETTER, // [218] = 0xda - INVALID_LETTER, // [219] = 0xdb - INVALID_LETTER, // [220] = 0xdc - INVALID_LETTER, // [221] = 0xdd - INVALID_LETTER, // [222] = 0xde - INVALID_LETTER, // [223] = 0xdf - INVALID_LETTER, // [224] = 0xe0 - INVALID_LETTER, // [225] = 0xe1 - INVALID_LETTER, // [226] = 0xe2 - INVALID_LETTER, // [227] = 0xe3 - INVALID_LETTER, // [228] = 0xe4 - INVALID_LETTER, // [229] = 0xe5 - INVALID_LETTER, // [230] = 0xe6 - INVALID_LETTER, // [231] = 0xe7 - INVALID_LETTER, // [232] = 0xe8 - INVALID_LETTER, // [233] = 0xe9 - INVALID_LETTER, // [234] = 0xea - INVALID_LETTER, // [235] = 0xeb - INVALID_LETTER, // [236] = 0xec - INVALID_LETTER, // [237] = 0xed - INVALID_LETTER, // [238] = 0xee - INVALID_LETTER, // [239] = 0xef - INVALID_LETTER, // [240] = 0xf0 - INVALID_LETTER, // [241] = 0xf1 - INVALID_LETTER, // [242] = 0xf2 - INVALID_LETTER, // [243] = 0xf3 - INVALID_LETTER, // [244] = 0xf4 - INVALID_LETTER, // [245] = 0xf5 - INVALID_LETTER, // [246] = 0xf6 - INVALID_LETTER, // [247] = 0xf7 - INVALID_LETTER, // [248] = 0xf8 - INVALID_LETTER, // [249] = 0xf9 - INVALID_LETTER, // [250] = 0xfa - INVALID_LETTER, // [251] = 0xfb - INVALID_LETTER, // [252] = 0xfc - INVALID_LETTER, // [253] = 0xfd - INVALID_LETTER, // [254] = 0xfe - INVALID_LETTER, // [255] = 0xff - }; - -unsigned char g_LetterToCharNucleo[256] = - { - 'A', // [0] - 'C', // [1] - 'G', // [2] - 'T', // [3] - INVALID_CHAR, // [4] - INVALID_CHAR, // [5] - INVALID_CHAR, // [6] - INVALID_CHAR, // [7] - INVALID_CHAR, // [8] - INVALID_CHAR, // [9] - INVALID_CHAR, // [10] - INVALID_CHAR, // [11] - INVALID_CHAR, // [12] - INVALID_CHAR, // [13] - INVALID_CHAR, // [14] - INVALID_CHAR, // [15] - INVALID_CHAR, // [16] - INVALID_CHAR, // [17] - INVALID_CHAR, // [18] - INVALID_CHAR, // [19] - INVALID_CHAR, // [20] - INVALID_CHAR, // [21] - INVALID_CHAR, // [22] - INVALID_CHAR, // [23] - INVALID_CHAR, // [24] - INVALID_CHAR, // [25] - INVALID_CHAR, // [26] - INVALID_CHAR, // [27] - INVALID_CHAR, // [28] - INVALID_CHAR, // [29] - INVALID_CHAR, // [30] - INVALID_CHAR, // [31] - INVALID_CHAR, // [32] - INVALID_CHAR, // [33] - INVALID_CHAR, // [34] - INVALID_CHAR, // [35] - INVALID_CHAR, // [36] - INVALID_CHAR, // [37] - INVALID_CHAR, // [38] - INVALID_CHAR, // [39] - INVALID_CHAR, // [40] - INVALID_CHAR, // [41] - INVALID_CHAR, // [42] - INVALID_CHAR, // [43] - INVALID_CHAR, // [44] - INVALID_CHAR, // [45] - INVALID_CHAR, // [46] - INVALID_CHAR, // [47] - INVALID_CHAR, // [48] - INVALID_CHAR, // [49] - INVALID_CHAR, // [50] - INVALID_CHAR, // [51] - INVALID_CHAR, // [52] - INVALID_CHAR, // [53] - INVALID_CHAR, // [54] - INVALID_CHAR, // [55] - INVALID_CHAR, // [56] - INVALID_CHAR, // [57] - INVALID_CHAR, // [58] - INVALID_CHAR, // [59] - INVALID_CHAR, // [60] - INVALID_CHAR, // [61] - INVALID_CHAR, // [62] - INVALID_CHAR, // [63] - INVALID_CHAR, // [64] - INVALID_CHAR, // [65] - INVALID_CHAR, // [66] - INVALID_CHAR, // [67] - INVALID_CHAR, // [68] - INVALID_CHAR, // [69] - INVALID_CHAR, // [70] - INVALID_CHAR, // [71] - INVALID_CHAR, // [72] - INVALID_CHAR, // [73] - INVALID_CHAR, // [74] - INVALID_CHAR, // [75] - INVALID_CHAR, // [76] - INVALID_CHAR, // [77] - INVALID_CHAR, // [78] - INVALID_CHAR, // [79] - INVALID_CHAR, // [80] - INVALID_CHAR, // [81] - INVALID_CHAR, // [82] - INVALID_CHAR, // [83] - INVALID_CHAR, // [84] - INVALID_CHAR, // [85] - INVALID_CHAR, // [86] - INVALID_CHAR, // [87] - INVALID_CHAR, // [88] - INVALID_CHAR, // [89] - INVALID_CHAR, // [90] - INVALID_CHAR, // [91] - INVALID_CHAR, // [92] - INVALID_CHAR, // [93] - INVALID_CHAR, // [94] - INVALID_CHAR, // [95] - INVALID_CHAR, // [96] - INVALID_CHAR, // [97] - INVALID_CHAR, // [98] - INVALID_CHAR, // [99] - INVALID_CHAR, // [100] - INVALID_CHAR, // [101] - INVALID_CHAR, // [102] - INVALID_CHAR, // [103] - INVALID_CHAR, // [104] - INVALID_CHAR, // [105] - INVALID_CHAR, // [106] - INVALID_CHAR, // [107] - INVALID_CHAR, // [108] - INVALID_CHAR, // [109] - INVALID_CHAR, // [110] - INVALID_CHAR, // [111] - INVALID_CHAR, // [112] - INVALID_CHAR, // [113] - INVALID_CHAR, // [114] - INVALID_CHAR, // [115] - INVALID_CHAR, // [116] - INVALID_CHAR, // [117] - INVALID_CHAR, // [118] - INVALID_CHAR, // [119] - INVALID_CHAR, // [120] - INVALID_CHAR, // [121] - INVALID_CHAR, // [122] - INVALID_CHAR, // [123] - INVALID_CHAR, // [124] - INVALID_CHAR, // [125] - INVALID_CHAR, // [126] - INVALID_CHAR, // [127] - INVALID_CHAR, // [128] - INVALID_CHAR, // [129] - INVALID_CHAR, // [130] - INVALID_CHAR, // [131] - INVALID_CHAR, // [132] - INVALID_CHAR, // [133] - INVALID_CHAR, // [134] - INVALID_CHAR, // [135] - INVALID_CHAR, // [136] - INVALID_CHAR, // [137] - INVALID_CHAR, // [138] - INVALID_CHAR, // [139] - INVALID_CHAR, // [140] - INVALID_CHAR, // [141] - INVALID_CHAR, // [142] - INVALID_CHAR, // [143] - INVALID_CHAR, // [144] - INVALID_CHAR, // [145] - INVALID_CHAR, // [146] - INVALID_CHAR, // [147] - INVALID_CHAR, // [148] - INVALID_CHAR, // [149] - INVALID_CHAR, // [150] - INVALID_CHAR, // [151] - INVALID_CHAR, // [152] - INVALID_CHAR, // [153] - INVALID_CHAR, // [154] - INVALID_CHAR, // [155] - INVALID_CHAR, // [156] - INVALID_CHAR, // [157] - INVALID_CHAR, // [158] - INVALID_CHAR, // [159] - INVALID_CHAR, // [160] - INVALID_CHAR, // [161] - INVALID_CHAR, // [162] - INVALID_CHAR, // [163] - INVALID_CHAR, // [164] - INVALID_CHAR, // [165] - INVALID_CHAR, // [166] - INVALID_CHAR, // [167] - INVALID_CHAR, // [168] - INVALID_CHAR, // [169] - INVALID_CHAR, // [170] - INVALID_CHAR, // [171] - INVALID_CHAR, // [172] - INVALID_CHAR, // [173] - INVALID_CHAR, // [174] - INVALID_CHAR, // [175] - INVALID_CHAR, // [176] - INVALID_CHAR, // [177] - INVALID_CHAR, // [178] - INVALID_CHAR, // [179] - INVALID_CHAR, // [180] - INVALID_CHAR, // [181] - INVALID_CHAR, // [182] - INVALID_CHAR, // [183] - INVALID_CHAR, // [184] - INVALID_CHAR, // [185] - INVALID_CHAR, // [186] - INVALID_CHAR, // [187] - INVALID_CHAR, // [188] - INVALID_CHAR, // [189] - INVALID_CHAR, // [190] - INVALID_CHAR, // [191] - INVALID_CHAR, // [192] - INVALID_CHAR, // [193] - INVALID_CHAR, // [194] - INVALID_CHAR, // [195] - INVALID_CHAR, // [196] - INVALID_CHAR, // [197] - INVALID_CHAR, // [198] - INVALID_CHAR, // [199] - INVALID_CHAR, // [200] - INVALID_CHAR, // [201] - INVALID_CHAR, // [202] - INVALID_CHAR, // [203] - INVALID_CHAR, // [204] - INVALID_CHAR, // [205] - INVALID_CHAR, // [206] - INVALID_CHAR, // [207] - INVALID_CHAR, // [208] - INVALID_CHAR, // [209] - INVALID_CHAR, // [210] - INVALID_CHAR, // [211] - INVALID_CHAR, // [212] - INVALID_CHAR, // [213] - INVALID_CHAR, // [214] - INVALID_CHAR, // [215] - INVALID_CHAR, // [216] - INVALID_CHAR, // [217] - INVALID_CHAR, // [218] - INVALID_CHAR, // [219] - INVALID_CHAR, // [220] - INVALID_CHAR, // [221] - INVALID_CHAR, // [222] - INVALID_CHAR, // [223] - INVALID_CHAR, // [224] - INVALID_CHAR, // [225] - INVALID_CHAR, // [226] - INVALID_CHAR, // [227] - INVALID_CHAR, // [228] - INVALID_CHAR, // [229] - INVALID_CHAR, // [230] - INVALID_CHAR, // [231] - INVALID_CHAR, // [232] - INVALID_CHAR, // [233] - INVALID_CHAR, // [234] - INVALID_CHAR, // [235] - INVALID_CHAR, // [236] - INVALID_CHAR, // [237] - INVALID_CHAR, // [238] - INVALID_CHAR, // [239] - INVALID_CHAR, // [240] - INVALID_CHAR, // [241] - INVALID_CHAR, // [242] - INVALID_CHAR, // [243] - INVALID_CHAR, // [244] - INVALID_CHAR, // [245] - INVALID_CHAR, // [246] - INVALID_CHAR, // [247] - INVALID_CHAR, // [248] - INVALID_CHAR, // [249] - INVALID_CHAR, // [250] - INVALID_CHAR, // [251] - INVALID_CHAR, // [252] - INVALID_CHAR, // [253] - INVALID_CHAR, // [254] - INVALID_CHAR, // [255] - }; - -unsigned g_CodonWordToAminoLetter[4*4*4] = - { - 8 , // [ 0] = AAA K (Lys) - 11, // [ 1] = AAC N (Asn) - 8 , // [ 2] = AAG K (Lys) - 11, // [ 3] = AAT N (Asn) - 16, // [ 4] = ACA T (Thr) - 16, // [ 5] = ACC T (Thr) - 16, // [ 6] = ACG T (Thr) - 16, // [ 7] = ACT T (Thr) - 14, // [ 8] = AGA R (Arg) - 15, // [ 9] = AGC S (Ser) - 14, // [10] = AGG R (Arg) - 15, // [11] = AGT S (Ser) - 7 , // [12] = ATA I (Ile) - 7 , // [13] = ATC I (Ile) - 10, // [14] = ATG M (Met) - 7 , // [15] = ATT I (Ile) - 13, // [16] = CAA Q (Gln) - 6 , // [17] = CAC H (His) - 13, // [18] = CAG Q (Gln) - 6 , // [19] = CAT H (His) - 12, // [20] = CCA P (Pro) - 12, // [21] = CCC P (Pro) - 12, // [22] = CCG P (Pro) - 12, // [23] = CCT P (Pro) - 14, // [24] = CGA R (Arg) - 14, // [25] = CGC R (Arg) - 14, // [26] = CGG R (Arg) - 14, // [27] = CGT R (Arg) - 9 , // [28] = CTA L (Leu) - 9 , // [29] = CTC L (Leu) - 9 , // [30] = CTG L (Leu) - 9 , // [31] = CTT L (Leu) - 3 , // [32] = GAA E (Glu) - 2 , // [33] = GAC D (Asp) - 3 , // [34] = GAG E (Glu) - 2 , // [35] = GAT D (Asp) - 0 , // [36] = GCA A (Ala) - 0 , // [37] = GCC A (Ala) - 0 , // [38] = GCG A (Ala) - 0 , // [39] = GCT A (Ala) - 5 , // [40] = GGA G (Gly) - 5 , // [41] = GGC G (Gly) - 5 , // [42] = GGG G (Gly) - 5 , // [43] = GGT G (Gly) - 17, // [44] = GTA V (Val) - 17, // [45] = GTC V (Val) - 17, // [46] = GTG V (Val) - 17, // [47] = GTT V (Val) - 20, // [48] = TAA * (STP) - 19, // [49] = TAC Y (Tyr) - 20, // [50] = TAG * (STP) - 19, // [51] = TAT Y (Tyr) - 15, // [52] = TCA S (Ser) - 15, // [53] = TCC S (Ser) - 15, // [54] = TCG S (Ser) - 15, // [55] = TCT S (Ser) - 20, // [56] = TGA * (STP) - 1 , // [57] = TGC C (Cys) - 18, // [58] = TGG W (Trp) - 1 , // [59] = TGT C (Cys) - 9 , // [60] = TTA L (Leu) - 4 , // [61] = TTC F (Phe) - 9 , // [62] = TTG L (Leu) - 4 , // [63] = TTT F (Phe) - }; - -char g_CodonWordToAminoChar[4*4*4] = - { - 'K', // [ 0] = AAA (Lys) - 'N', // [ 1] = AAC (Asn) - 'K', // [ 2] = AAG (Lys) - 'N', // [ 3] = AAT (Asn) - 'T', // [ 4] = ACA (Thr) - 'T', // [ 5] = ACC (Thr) - 'T', // [ 6] = ACG (Thr) - 'T', // [ 7] = ACT (Thr) - 'R', // [ 8] = AGA (Arg) - 'S', // [ 9] = AGC (Ser) - 'R', // [10] = AGG (Arg) - 'S', // [11] = AGT (Ser) - 'I', // [12] = ATA (Ile) - 'I', // [13] = ATC (Ile) - 'M', // [14] = ATG (Met) - 'I', // [15] = ATT (Ile) - 'Q', // [16] = CAA (Gln) - 'H', // [17] = CAC (His) - 'Q', // [18] = CAG (Gln) - 'H', // [19] = CAT (His) - 'P', // [20] = CCA (Pro) - 'P', // [21] = CCC (Pro) - 'P', // [22] = CCG (Pro) - 'P', // [23] = CCT (Pro) - 'R', // [24] = CGA (Arg) - 'R', // [25] = CGC (Arg) - 'R', // [26] = CGG (Arg) - 'R', // [27] = CGT (Arg) - 'L', // [28] = CTA (Leu) - 'L', // [29] = CTC (Leu) - 'L', // [30] = CTG (Leu) - 'L', // [31] = CTT (Leu) - 'E', // [32] = GAA (Glu) - 'D', // [33] = GAC (Asp) - 'E', // [34] = GAG (Glu) - 'D', // [35] = GAT (Asp) - 'A', // [36] = GCA (Ala) - 'A', // [37] = GCC (Ala) - 'A', // [38] = GCG (Ala) - 'A', // [39] = GCT (Ala) - 'G', // [40] = GGA (Gly) - 'G', // [41] = GGC (Gly) - 'G', // [42] = GGG (Gly) - 'G', // [43] = GGT (Gly) - 'V', // [44] = GTA (Val) - 'V', // [45] = GTC (Val) - 'V', // [46] = GTG (Val) - 'V', // [47] = GTT (Val) - '*', // [48] = TAA (STP) - 'Y', // [49] = TAC (Tyr) - '*', // [50] = TAG (STP) - 'Y', // [51] = TAT (Tyr) - 'S', // [52] = TCA (Ser) - 'S', // [53] = TCC (Ser) - 'S', // [54] = TCG (Ser) - 'S', // [55] = TCT (Ser) - '*', // [56] = TGA (STP) - 'C', // [57] = TGC (Cys) - 'W', // [58] = TGG (Trp) - 'C', // [59] = TGT (Cys) - 'L', // [60] = TTA (Leu) - 'F', // [61] = TTC (Phe) - 'L', // [62] = TTG (Leu) - 'F', // [63] = TTT (Phe) - }; - -unsigned char g_CharToCompChar[256] = - { - INVALID_CHAR, // [ 0] - INVALID_CHAR, // [ 1] - INVALID_CHAR, // [ 2] - INVALID_CHAR, // [ 3] - INVALID_CHAR, // [ 4] - INVALID_CHAR, // [ 5] - INVALID_CHAR, // [ 6] - INVALID_CHAR, // [ 7] - INVALID_CHAR, // [ 8] - INVALID_CHAR, // [ 9] - INVALID_CHAR, // [ 10] - INVALID_CHAR, // [ 11] - INVALID_CHAR, // [ 12] - INVALID_CHAR, // [ 13] - INVALID_CHAR, // [ 14] - INVALID_CHAR, // [ 15] - INVALID_CHAR, // [ 16] - INVALID_CHAR, // [ 17] - INVALID_CHAR, // [ 18] - INVALID_CHAR, // [ 19] - INVALID_CHAR, // [ 20] - INVALID_CHAR, // [ 21] - INVALID_CHAR, // [ 22] - INVALID_CHAR, // [ 23] - INVALID_CHAR, // [ 24] - INVALID_CHAR, // [ 25] - INVALID_CHAR, // [ 26] - INVALID_CHAR, // [ 27] - INVALID_CHAR, // [ 28] - INVALID_CHAR, // [ 29] - INVALID_CHAR, // [ 30] - INVALID_CHAR, // [ 31] - INVALID_CHAR, // [ 32] - INVALID_CHAR, // [ 33] - INVALID_CHAR, // [ 34] - INVALID_CHAR, // [ 35] - INVALID_CHAR, // [ 36] - INVALID_CHAR, // [ 37] - INVALID_CHAR, // [ 38] - INVALID_CHAR, // [ 39] - INVALID_CHAR, // [ 40] - INVALID_CHAR, // [ 41] - INVALID_CHAR, // [ 42] - INVALID_CHAR, // [ 43] - INVALID_CHAR, // [ 44] - INVALID_CHAR, // [ 45] - INVALID_CHAR, // [ 46] - INVALID_CHAR, // [ 47] - INVALID_CHAR, // [ 48] - INVALID_CHAR, // [ 49] - INVALID_CHAR, // [ 50] - INVALID_CHAR, // [ 51] - INVALID_CHAR, // [ 52] - INVALID_CHAR, // [ 53] - INVALID_CHAR, // [ 54] - INVALID_CHAR, // [ 55] - INVALID_CHAR, // [ 56] - INVALID_CHAR, // [ 57] - INVALID_CHAR, // [ 58] - INVALID_CHAR, // [ 59] - INVALID_CHAR, // [ 60] - INVALID_CHAR, // [ 61] - INVALID_CHAR, // [ 62] - INVALID_CHAR, // [ 63] - INVALID_CHAR, // [ 64] - 'T', // [ 65] A -> T - INVALID_CHAR, // [ 66] - 'G', // [ 67] C -> G - INVALID_CHAR, // [ 68] - INVALID_CHAR, // [ 69] - INVALID_CHAR, // [ 70] - 'C', // [ 71] G -> C - INVALID_CHAR, // [ 72] - INVALID_CHAR, // [ 73] - INVALID_CHAR, // [ 74] - INVALID_CHAR, // [ 75] - INVALID_CHAR, // [ 76] - INVALID_CHAR, // [ 77] - INVALID_CHAR, // [ 78] - INVALID_CHAR, // [ 79] - INVALID_CHAR, // [ 80] - INVALID_CHAR, // [ 81] - INVALID_CHAR, // [ 82] - INVALID_CHAR, // [ 83] - 'A', // [ 84] T -> A - 'A', // [ 85] U -> A - INVALID_CHAR, // [ 86] - INVALID_CHAR, // [ 87] - INVALID_CHAR, // [ 88] - INVALID_CHAR, // [ 89] - INVALID_CHAR, // [ 90] - INVALID_CHAR, // [ 91] - INVALID_CHAR, // [ 92] - INVALID_CHAR, // [ 93] - INVALID_CHAR, // [ 94] - INVALID_CHAR, // [ 95] - INVALID_CHAR, // [ 96] - 'T', // [ 97] a -> T - INVALID_CHAR, // [ 98] - 'G', // [ 99] c -> G - INVALID_CHAR, // [100] - INVALID_CHAR, // [101] - INVALID_CHAR, // [102] - 'C', // [103] g -> C - INVALID_CHAR, // [104] - INVALID_CHAR, // [105] - INVALID_CHAR, // [106] - INVALID_CHAR, // [107] - INVALID_CHAR, // [108] - INVALID_CHAR, // [109] - INVALID_CHAR, // [110] - INVALID_CHAR, // [111] - INVALID_CHAR, // [112] - INVALID_CHAR, // [113] - INVALID_CHAR, // [114] - INVALID_CHAR, // [115] - 'A', // [116] t -> A - 'A', // [117] u -> A - INVALID_CHAR, // [118] - INVALID_CHAR, // [119] - INVALID_CHAR, // [120] - INVALID_CHAR, // [121] - INVALID_CHAR, // [122] - INVALID_CHAR, // [123] - INVALID_CHAR, // [124] - INVALID_CHAR, // [125] - INVALID_CHAR, // [126] - INVALID_CHAR, // [127] - INVALID_CHAR, // [128] - INVALID_CHAR, // [129] - INVALID_CHAR, // [130] - INVALID_CHAR, // [131] - INVALID_CHAR, // [132] - INVALID_CHAR, // [133] - INVALID_CHAR, // [134] - INVALID_CHAR, // [135] - INVALID_CHAR, // [136] - INVALID_CHAR, // [137] - INVALID_CHAR, // [138] - INVALID_CHAR, // [139] - INVALID_CHAR, // [140] - INVALID_CHAR, // [141] - INVALID_CHAR, // [142] - INVALID_CHAR, // [143] - INVALID_CHAR, // [144] - INVALID_CHAR, // [145] - INVALID_CHAR, // [146] - INVALID_CHAR, // [147] - INVALID_CHAR, // [148] - INVALID_CHAR, // [149] - INVALID_CHAR, // [150] - INVALID_CHAR, // [151] - INVALID_CHAR, // [152] - INVALID_CHAR, // [153] - INVALID_CHAR, // [154] - INVALID_CHAR, // [155] - INVALID_CHAR, // [156] - INVALID_CHAR, // [157] - INVALID_CHAR, // [158] - INVALID_CHAR, // [159] - INVALID_CHAR, // [160] - INVALID_CHAR, // [161] - INVALID_CHAR, // [162] - INVALID_CHAR, // [163] - INVALID_CHAR, // [164] - INVALID_CHAR, // [165] - INVALID_CHAR, // [166] - INVALID_CHAR, // [167] - INVALID_CHAR, // [168] - INVALID_CHAR, // [169] - INVALID_CHAR, // [170] - INVALID_CHAR, // [171] - INVALID_CHAR, // [172] - INVALID_CHAR, // [173] - INVALID_CHAR, // [174] - INVALID_CHAR, // [175] - INVALID_CHAR, // [176] - INVALID_CHAR, // [177] - INVALID_CHAR, // [178] - INVALID_CHAR, // [179] - INVALID_CHAR, // [180] - INVALID_CHAR, // [181] - INVALID_CHAR, // [182] - INVALID_CHAR, // [183] - INVALID_CHAR, // [184] - INVALID_CHAR, // [185] - INVALID_CHAR, // [186] - INVALID_CHAR, // [187] - INVALID_CHAR, // [188] - INVALID_CHAR, // [189] - INVALID_CHAR, // [190] - INVALID_CHAR, // [191] - INVALID_CHAR, // [192] - INVALID_CHAR, // [193] - INVALID_CHAR, // [194] - INVALID_CHAR, // [195] - INVALID_CHAR, // [196] - INVALID_CHAR, // [197] - INVALID_CHAR, // [198] - INVALID_CHAR, // [199] - INVALID_CHAR, // [200] - INVALID_CHAR, // [201] - INVALID_CHAR, // [202] - INVALID_CHAR, // [203] - INVALID_CHAR, // [204] - INVALID_CHAR, // [205] - INVALID_CHAR, // [206] - INVALID_CHAR, // [207] - INVALID_CHAR, // [208] - INVALID_CHAR, // [209] - INVALID_CHAR, // [210] - INVALID_CHAR, // [211] - INVALID_CHAR, // [212] - INVALID_CHAR, // [213] - INVALID_CHAR, // [214] - INVALID_CHAR, // [215] - INVALID_CHAR, // [216] - INVALID_CHAR, // [217] - INVALID_CHAR, // [218] - INVALID_CHAR, // [219] - INVALID_CHAR, // [220] - INVALID_CHAR, // [221] - INVALID_CHAR, // [222] - INVALID_CHAR, // [223] - INVALID_CHAR, // [224] - INVALID_CHAR, // [225] - INVALID_CHAR, // [226] - INVALID_CHAR, // [227] - INVALID_CHAR, // [228] - INVALID_CHAR, // [229] - INVALID_CHAR, // [230] - INVALID_CHAR, // [231] - INVALID_CHAR, // [232] - INVALID_CHAR, // [233] - INVALID_CHAR, // [234] - INVALID_CHAR, // [235] - INVALID_CHAR, // [236] - INVALID_CHAR, // [237] - INVALID_CHAR, // [238] - INVALID_CHAR, // [239] - INVALID_CHAR, // [240] - INVALID_CHAR, // [241] - INVALID_CHAR, // [242] - INVALID_CHAR, // [243] - INVALID_CHAR, // [244] - INVALID_CHAR, // [245] - INVALID_CHAR, // [246] - INVALID_CHAR, // [247] - INVALID_CHAR, // [248] - INVALID_CHAR, // [249] - INVALID_CHAR, // [250] - INVALID_CHAR, // [251] - INVALID_CHAR, // [252] - INVALID_CHAR, // [253] - INVALID_CHAR, // [254] - INVALID_CHAR, // [255] -}; - -unsigned g_CharToCompLetter[256] = - { - INVALID_LETTER, // [ 0] - INVALID_LETTER, // [ 1] - INVALID_LETTER, // [ 2] - INVALID_LETTER, // [ 3] - INVALID_LETTER, // [ 4] - INVALID_LETTER, // [ 5] - INVALID_LETTER, // [ 6] - INVALID_LETTER, // [ 7] - INVALID_LETTER, // [ 8] - INVALID_LETTER, // [ 9] - INVALID_LETTER, // [ 10] - INVALID_LETTER, // [ 11] - INVALID_LETTER, // [ 12] - INVALID_LETTER, // [ 13] - INVALID_LETTER, // [ 14] - INVALID_LETTER, // [ 15] - INVALID_LETTER, // [ 16] - INVALID_LETTER, // [ 17] - INVALID_LETTER, // [ 18] - INVALID_LETTER, // [ 19] - INVALID_LETTER, // [ 20] - INVALID_LETTER, // [ 21] - INVALID_LETTER, // [ 22] - INVALID_LETTER, // [ 23] - INVALID_LETTER, // [ 24] - INVALID_LETTER, // [ 25] - INVALID_LETTER, // [ 26] - INVALID_LETTER, // [ 27] - INVALID_LETTER, // [ 28] - INVALID_LETTER, // [ 29] - INVALID_LETTER, // [ 30] - INVALID_LETTER, // [ 31] - INVALID_LETTER, // [ 32] - INVALID_LETTER, // [ 33] - INVALID_LETTER, // [ 34] - INVALID_LETTER, // [ 35] - INVALID_LETTER, // [ 36] - INVALID_LETTER, // [ 37] - INVALID_LETTER, // [ 38] - INVALID_LETTER, // [ 39] - INVALID_LETTER, // [ 40] - INVALID_LETTER, // [ 41] - INVALID_LETTER, // [ 42] - INVALID_LETTER, // [ 43] - INVALID_LETTER, // [ 44] - INVALID_LETTER, // [ 45] - INVALID_LETTER, // [ 46] - INVALID_LETTER, // [ 47] - INVALID_LETTER, // [ 48] - INVALID_LETTER, // [ 49] - INVALID_LETTER, // [ 50] - INVALID_LETTER, // [ 51] - INVALID_LETTER, // [ 52] - INVALID_LETTER, // [ 53] - INVALID_LETTER, // [ 54] - INVALID_LETTER, // [ 55] - INVALID_LETTER, // [ 56] - INVALID_LETTER, // [ 57] - INVALID_LETTER, // [ 58] - INVALID_LETTER, // [ 59] - INVALID_LETTER, // [ 60] - INVALID_LETTER, // [ 61] - INVALID_LETTER, // [ 62] - INVALID_LETTER, // [ 63] - INVALID_LETTER, // [ 64] - 3, // [ 65] A -> T - INVALID_LETTER, // [ 66] - 2, // [ 67] C -> G - INVALID_LETTER, // [ 68] - INVALID_LETTER, // [ 69] - INVALID_LETTER, // [ 70] - 1, // [ 71] G -> C - INVALID_LETTER, // [ 72] - INVALID_LETTER, // [ 73] - INVALID_LETTER, // [ 74] - INVALID_LETTER, // [ 75] - INVALID_LETTER, // [ 76] - INVALID_LETTER, // [ 77] - INVALID_LETTER, // [ 78] - INVALID_LETTER, // [ 79] - INVALID_LETTER, // [ 80] - INVALID_LETTER, // [ 81] - INVALID_LETTER, // [ 82] - INVALID_LETTER, // [ 83] - 0, // [ 84] T -> A - 0, // [ 85] U -> A - INVALID_LETTER, // [ 86] - INVALID_LETTER, // [ 87] - INVALID_LETTER, // [ 88] - INVALID_LETTER, // [ 89] - INVALID_LETTER, // [ 90] - INVALID_LETTER, // [ 91] - INVALID_LETTER, // [ 92] - INVALID_LETTER, // [ 93] - INVALID_LETTER, // [ 94] - INVALID_LETTER, // [ 95] - INVALID_LETTER, // [ 96] - 3, // [ 97] a -> T - INVALID_LETTER, // [ 98] - 2, // [ 99] c -> G - INVALID_LETTER, // [100] - INVALID_LETTER, // [101] - INVALID_LETTER, // [102] - 1, // [103] g -> C - INVALID_LETTER, // [104] - INVALID_LETTER, // [105] - INVALID_LETTER, // [106] - INVALID_LETTER, // [107] - INVALID_LETTER, // [108] - INVALID_LETTER, // [109] - INVALID_LETTER, // [110] - INVALID_LETTER, // [111] - INVALID_LETTER, // [112] - INVALID_LETTER, // [113] - INVALID_LETTER, // [114] - INVALID_LETTER, // [115] - 0, // [116] t -> A - 0, // [117] u -> A - INVALID_LETTER, // [118] - INVALID_LETTER, // [119] - INVALID_LETTER, // [120] - INVALID_LETTER, // [121] - INVALID_LETTER, // [122] - INVALID_LETTER, // [123] - INVALID_LETTER, // [124] - INVALID_LETTER, // [125] - INVALID_LETTER, // [126] - INVALID_LETTER, // [127] - INVALID_LETTER, // [128] - INVALID_LETTER, // [129] - INVALID_LETTER, // [130] - INVALID_LETTER, // [131] - INVALID_LETTER, // [132] - INVALID_LETTER, // [133] - INVALID_LETTER, // [134] - INVALID_LETTER, // [135] - INVALID_LETTER, // [136] - INVALID_LETTER, // [137] - INVALID_LETTER, // [138] - INVALID_LETTER, // [139] - INVALID_LETTER, // [140] - INVALID_LETTER, // [141] - INVALID_LETTER, // [142] - INVALID_LETTER, // [143] - INVALID_LETTER, // [144] - INVALID_LETTER, // [145] - INVALID_LETTER, // [146] - INVALID_LETTER, // [147] - INVALID_LETTER, // [148] - INVALID_LETTER, // [149] - INVALID_LETTER, // [150] - INVALID_LETTER, // [151] - INVALID_LETTER, // [152] - INVALID_LETTER, // [153] - INVALID_LETTER, // [154] - INVALID_LETTER, // [155] - INVALID_LETTER, // [156] - INVALID_LETTER, // [157] - INVALID_LETTER, // [158] - INVALID_LETTER, // [159] - INVALID_LETTER, // [160] - INVALID_LETTER, // [161] - INVALID_LETTER, // [162] - INVALID_LETTER, // [163] - INVALID_LETTER, // [164] - INVALID_LETTER, // [165] - INVALID_LETTER, // [166] - INVALID_LETTER, // [167] - INVALID_LETTER, // [168] - INVALID_LETTER, // [169] - INVALID_LETTER, // [170] - INVALID_LETTER, // [171] - INVALID_LETTER, // [172] - INVALID_LETTER, // [173] - INVALID_LETTER, // [174] - INVALID_LETTER, // [175] - INVALID_LETTER, // [176] - INVALID_LETTER, // [177] - INVALID_LETTER, // [178] - INVALID_LETTER, // [179] - INVALID_LETTER, // [180] - INVALID_LETTER, // [181] - INVALID_LETTER, // [182] - INVALID_LETTER, // [183] - INVALID_LETTER, // [184] - INVALID_LETTER, // [185] - INVALID_LETTER, // [186] - INVALID_LETTER, // [187] - INVALID_LETTER, // [188] - INVALID_LETTER, // [189] - INVALID_LETTER, // [190] - INVALID_LETTER, // [191] - INVALID_LETTER, // [192] - INVALID_LETTER, // [193] - INVALID_LETTER, // [194] - INVALID_LETTER, // [195] - INVALID_LETTER, // [196] - INVALID_LETTER, // [197] - INVALID_LETTER, // [198] - INVALID_LETTER, // [199] - INVALID_LETTER, // [200] - INVALID_LETTER, // [201] - INVALID_LETTER, // [202] - INVALID_LETTER, // [203] - INVALID_LETTER, // [204] - INVALID_LETTER, // [205] - INVALID_LETTER, // [206] - INVALID_LETTER, // [207] - INVALID_LETTER, // [208] - INVALID_LETTER, // [209] - INVALID_LETTER, // [210] - INVALID_LETTER, // [211] - INVALID_LETTER, // [212] - INVALID_LETTER, // [213] - INVALID_LETTER, // [214] - INVALID_LETTER, // [215] - INVALID_LETTER, // [216] - INVALID_LETTER, // [217] - INVALID_LETTER, // [218] - INVALID_LETTER, // [219] - INVALID_LETTER, // [220] - INVALID_LETTER, // [221] - INVALID_LETTER, // [222] - INVALID_LETTER, // [223] - INVALID_LETTER, // [224] - INVALID_LETTER, // [225] - INVALID_LETTER, // [226] - INVALID_LETTER, // [227] - INVALID_LETTER, // [228] - INVALID_LETTER, // [229] - INVALID_LETTER, // [230] - INVALID_LETTER, // [231] - INVALID_LETTER, // [232] - INVALID_LETTER, // [233] - INVALID_LETTER, // [234] - INVALID_LETTER, // [235] - INVALID_LETTER, // [236] - INVALID_LETTER, // [237] - INVALID_LETTER, // [238] - INVALID_LETTER, // [239] - INVALID_LETTER, // [240] - INVALID_LETTER, // [241] - INVALID_LETTER, // [242] - INVALID_LETTER, // [243] - INVALID_LETTER, // [244] - INVALID_LETTER, // [245] - INVALID_LETTER, // [246] - INVALID_LETTER, // [247] - INVALID_LETTER, // [248] - INVALID_LETTER, // [249] - INVALID_LETTER, // [250] - INVALID_LETTER, // [251] - INVALID_LETTER, // [252] - INVALID_LETTER, // [253] - INVALID_LETTER, // [254] - INVALID_LETTER, // [255] -}; - -bool g_IsAminoChar[256] = - { - false, // [ 0] 0x00 - false, // [ 1] 0x01 - false, // [ 2] 0x02 - false, // [ 3] 0x03 - false, // [ 4] 0x04 - false, // [ 5] 0x05 - false, // [ 6] 0x06 - false, // [ 7] 0x07 - false, // [ 8] 0x08 - false, // [ 9] 0x09 - false, // [ 10] 0x0a - false, // [ 11] 0x0b - false, // [ 12] 0x0c - false, // [ 13] 0x0d - false, // [ 14] 0x0e - false, // [ 15] 0x0f - false, // [ 16] 0x10 - false, // [ 17] 0x11 - false, // [ 18] 0x12 - false, // [ 19] 0x13 - false, // [ 20] 0x14 - false, // [ 21] 0x15 - false, // [ 22] 0x16 - false, // [ 23] 0x17 - false, // [ 24] 0x18 - false, // [ 25] 0x19 - false, // [ 26] 0x1a - false, // [ 27] 0x1b - false, // [ 28] 0x1c - false, // [ 29] 0x1d - false, // [ 30] 0x1e - false, // [ 31] 0x1f - false, // [ 32] ' ' - false, // [ 33] '!' - false, // [ 34] '"' - false, // [ 35] '#' - false, // [ 36] '$' - false, // [ 37] '%' - false, // [ 38] '&' - false, // [ 39] ''' - false, // [ 40] '(' - false, // [ 41] ')' - true, // [ 42] '*' = STP - false, // [ 43] '+' - false, // [ 44] ',' - false, // [ 45] '-' - false, // [ 46] '.' - false, // [ 47] '/' - false, // [ 48] '0' - false, // [ 49] '1' - false, // [ 50] '2' - false, // [ 51] '3' - false, // [ 52] '4' - false, // [ 53] '5' - false, // [ 54] '6' - false, // [ 55] '7' - false, // [ 56] '8' - false, // [ 57] '9' - false, // [ 58] ':' - false, // [ 59] ';' - false, // [ 60] '<' - false, // [ 61] '=' - false, // [ 62] '>' - false, // [ 63] '?' - false, // [ 64] '@' - true, // [ 65] 'A' = Ala - false, // [ 66] 'B' - true, // [ 67] 'C' = Cys - true, // [ 68] 'D' = Asp - true, // [ 69] 'E' = Glu - true, // [ 70] 'F' = Phe - true, // [ 71] 'G' = Gly - true, // [ 72] 'H' = His - true, // [ 73] 'I' = Ile - false, // [ 74] 'J' - true, // [ 75] 'K' = Lys - true, // [ 76] 'L' = Leu - true, // [ 77] 'M' = Met - true, // [ 78] 'N' = Asn - false, // [ 79] 'O' - true, // [ 80] 'P' = Pro - true, // [ 81] 'Q' = Gln - true, // [ 82] 'R' = Arg - true, // [ 83] 'S' = Ser - true, // [ 84] 'T' = Thr - false, // [ 85] 'U' - true, // [ 86] 'V' = Val - true, // [ 87] 'W' = Trp - false, // [ 88] 'X' - true, // [ 89] 'Y' = Tyr - false, // [ 90] 'Z' - false, // [ 91] '[' - false, // [ 92] '\' - false, // [ 93] ']' - false, // [ 94] '^' - false, // [ 95] '_' - false, // [ 96] '`' - true, // [ 97] 'A' = Ala - false, // [ 98] 'B' - true, // [ 99] 'C' = Cys - true, // [100] 'D' = Asp - true, // [101] 'E' = Glu - true, // [102] 'F' = Phe - true, // [103] 'G' = Gly - true, // [104] 'H' = His - true, // [105] 'I' = Ile - false, // [106] 'J' - true, // [107] 'K' = Lys - true, // [108] 'L' = Leu - true, // [109] 'M' = Met - true, // [110] 'N' = Asn - false, // [111] 'O' - true, // [112] 'P' = Pro - true, // [113] 'Q' = Gln - true, // [114] 'R' = Arg - true, // [115] 'S' = Ser - true, // [116] 'T' = Thr - false, // [117] 'U' - true, // [118] 'V' = Val - true, // [119] 'W' = Trp - false, // [120] 'X' - true, // [121] 'Y' = Tyr - false, // [122] 'Z' - false, // [123] '{' - false, // [124] '|' - false, // [125] '}' - false, // [126] '~' - false, // [127] 0x7f - false, // [128] 0x80 - false, // [129] 0x81 - false, // [130] 0x82 - false, // [131] 0x83 - false, // [132] 0x84 - false, // [133] 0x85 - false, // [134] 0x86 - false, // [135] 0x87 - false, // [136] 0x88 - false, // [137] 0x89 - false, // [138] 0x8a - false, // [139] 0x8b - false, // [140] 0x8c - false, // [141] 0x8d - false, // [142] 0x8e - false, // [143] 0x8f - false, // [144] 0x90 - false, // [145] 0x91 - false, // [146] 0x92 - false, // [147] 0x93 - false, // [148] 0x94 - false, // [149] 0x95 - false, // [150] 0x96 - false, // [151] 0x97 - false, // [152] 0x98 - false, // [153] 0x99 - false, // [154] 0x9a - false, // [155] 0x9b - false, // [156] 0x9c - false, // [157] 0x9d - false, // [158] 0x9e - false, // [159] 0x9f - false, // [160] 0xa0 - false, // [161] 0xa1 - false, // [162] 0xa2 - false, // [163] 0xa3 - false, // [164] 0xa4 - false, // [165] 0xa5 - false, // [166] 0xa6 - false, // [167] 0xa7 - false, // [168] 0xa8 - false, // [169] 0xa9 - false, // [170] 0xaa - false, // [171] 0xab - false, // [172] 0xac - false, // [173] 0xad - false, // [174] 0xae - false, // [175] 0xaf - false, // [176] 0xb0 - false, // [177] 0xb1 - false, // [178] 0xb2 - false, // [179] 0xb3 - false, // [180] 0xb4 - false, // [181] 0xb5 - false, // [182] 0xb6 - false, // [183] 0xb7 - false, // [184] 0xb8 - false, // [185] 0xb9 - false, // [186] 0xba - false, // [187] 0xbb - false, // [188] 0xbc - false, // [189] 0xbd - false, // [190] 0xbe - false, // [191] 0xbf - false, // [192] 0xc0 - false, // [193] 0xc1 - false, // [194] 0xc2 - false, // [195] 0xc3 - false, // [196] 0xc4 - false, // [197] 0xc5 - false, // [198] 0xc6 - false, // [199] 0xc7 - false, // [200] 0xc8 - false, // [201] 0xc9 - false, // [202] 0xca - false, // [203] 0xcb - false, // [204] 0xcc - false, // [205] 0xcd - false, // [206] 0xce - false, // [207] 0xcf - false, // [208] 0xd0 - false, // [209] 0xd1 - false, // [210] 0xd2 - false, // [211] 0xd3 - false, // [212] 0xd4 - false, // [213] 0xd5 - false, // [214] 0xd6 - false, // [215] 0xd7 - false, // [216] 0xd8 - false, // [217] 0xd9 - false, // [218] 0xda - false, // [219] 0xdb - false, // [220] 0xdc - false, // [221] 0xdd - false, // [222] 0xde - false, // [223] 0xdf - false, // [224] 0xe0 - false, // [225] 0xe1 - false, // [226] 0xe2 - false, // [227] 0xe3 - false, // [228] 0xe4 - false, // [229] 0xe5 - false, // [230] 0xe6 - false, // [231] 0xe7 - false, // [232] 0xe8 - false, // [233] 0xe9 - false, // [234] 0xea - false, // [235] 0xeb - false, // [236] 0xec - false, // [237] 0xed - false, // [238] 0xee - false, // [239] 0xef - false, // [240] 0xf0 - false, // [241] 0xf1 - false, // [242] 0xf2 - false, // [243] 0xf3 - false, // [244] 0xf4 - false, // [245] 0xf5 - false, // [246] 0xf6 - false, // [247] 0xf7 - false, // [248] 0xf8 - false, // [249] 0xf9 - false, // [250] 0xfa - false, // [251] 0xfb - false, // [252] 0xfc - false, // [253] 0xfd - false, // [254] 0xfe - false, // [255] 0xff - }; - -bool g_IsNucleoChar[256] = - { - false, // [ 0] 0x00 - false, // [ 1] 0x01 - false, // [ 2] 0x02 - false, // [ 3] 0x03 - false, // [ 4] 0x04 - false, // [ 5] 0x05 - false, // [ 6] 0x06 - false, // [ 7] 0x07 - false, // [ 8] 0x08 - false, // [ 9] 0x09 - false, // [ 10] 0x0a - false, // [ 11] 0x0b - false, // [ 12] 0x0c - false, // [ 13] 0x0d - false, // [ 14] 0x0e - false, // [ 15] 0x0f - false, // [ 16] 0x10 - false, // [ 17] 0x11 - false, // [ 18] 0x12 - false, // [ 19] 0x13 - false, // [ 20] 0x14 - false, // [ 21] 0x15 - false, // [ 22] 0x16 - false, // [ 23] 0x17 - false, // [ 24] 0x18 - false, // [ 25] 0x19 - false, // [ 26] 0x1a - false, // [ 27] 0x1b - false, // [ 28] 0x1c - false, // [ 29] 0x1d - false, // [ 30] 0x1e - false, // [ 31] 0x1f - false, // [ 32] ' ' - false, // [ 33] '!' - false, // [ 34] '"' - false, // [ 35] '#' - false, // [ 36] '$' - false, // [ 37] '%' - false, // [ 38] '&' - false, // [ 39] ''' - false, // [ 40] '(' - false, // [ 41] ')' - false, // [ 42] '*' - false, // [ 43] '+' - false, // [ 44] ',' - false, // [ 45] '-' - false, // [ 46] '.' - false, // [ 47] '/' - false, // [ 48] '0' - false, // [ 49] '1' - false, // [ 50] '2' - false, // [ 51] '3' - false, // [ 52] '4' - false, // [ 53] '5' - false, // [ 54] '6' - false, // [ 55] '7' - false, // [ 56] '8' - false, // [ 57] '9' - false, // [ 58] ':' - false, // [ 59] ';' - false, // [ 60] '<' - false, // [ 61] '=' - false, // [ 62] '>' - false, // [ 63] '?' - false, // [ 64] '@' - true, // [ 65] 'A' (Nucleotide) - false, // [ 66] 'B' - true, // [ 67] 'C' (Nucleotide) - false, // [ 68] 'D' - false, // [ 69] 'E' - false, // [ 70] 'F' - true, // [ 71] 'G' (Nucleotide) - false, // [ 72] 'H' - false, // [ 73] 'I' - false, // [ 74] 'J' - false, // [ 75] 'K' - false, // [ 76] 'L' - false, // [ 77] 'M' - true, // [ 78] 'N' (Nucleotide) - false, // [ 79] 'O' - false, // [ 80] 'P' - false, // [ 81] 'Q' - false, // [ 82] 'R' - false, // [ 83] 'S' - true, // [ 84] 'T' (Nucleotide) - true, // [ 85] 'U' (Nucleotide) - false, // [ 86] 'V' - false, // [ 87] 'W' - false, // [ 88] 'X' - false, // [ 89] 'Y' - false, // [ 90] 'Z' - false, // [ 91] '[' - false, // [ 92] '\' - false, // [ 93] ']' - false, // [ 94] '^' - false, // [ 95] '_' - false, // [ 96] '`' - true, // [ 97] 'A' (Nucleotide) - false, // [ 98] 'B' - true, // [ 99] 'C' (Nucleotide) - false, // [100] 'D' - false, // [101] 'E' - false, // [102] 'F' - true, // [103] 'G' (Nucleotide) - false, // [104] 'H' - false, // [105] 'I' - false, // [106] 'J' - false, // [107] 'K' - false, // [108] 'L' - false, // [109] 'M' - true, // [110] 'N' (Nucleotide) - false, // [111] 'O' - false, // [112] 'P' - false, // [113] 'Q' - false, // [114] 'R' - false, // [115] 'S' - true, // [116] 'T' (Nucleotide) - true, // [117] 'U' (Nucleotide) - false, // [118] 'V' - false, // [119] 'W' - false, // [120] 'X' - false, // [121] 'Y' - false, // [122] 'Z' - false, // [123] '{' - false, // [124] '|' - false, // [125] '}' - false, // [126] '~' - false, // [127] 0x7f - false, // [128] 0x80 - false, // [129] 0x81 - false, // [130] 0x82 - false, // [131] 0x83 - false, // [132] 0x84 - false, // [133] 0x85 - false, // [134] 0x86 - false, // [135] 0x87 - false, // [136] 0x88 - false, // [137] 0x89 - false, // [138] 0x8a - false, // [139] 0x8b - false, // [140] 0x8c - false, // [141] 0x8d - false, // [142] 0x8e - false, // [143] 0x8f - false, // [144] 0x90 - false, // [145] 0x91 - false, // [146] 0x92 - false, // [147] 0x93 - false, // [148] 0x94 - false, // [149] 0x95 - false, // [150] 0x96 - false, // [151] 0x97 - false, // [152] 0x98 - false, // [153] 0x99 - false, // [154] 0x9a - false, // [155] 0x9b - false, // [156] 0x9c - false, // [157] 0x9d - false, // [158] 0x9e - false, // [159] 0x9f - false, // [160] 0xa0 - false, // [161] 0xa1 - false, // [162] 0xa2 - false, // [163] 0xa3 - false, // [164] 0xa4 - false, // [165] 0xa5 - false, // [166] 0xa6 - false, // [167] 0xa7 - false, // [168] 0xa8 - false, // [169] 0xa9 - false, // [170] 0xaa - false, // [171] 0xab - false, // [172] 0xac - false, // [173] 0xad - false, // [174] 0xae - false, // [175] 0xaf - false, // [176] 0xb0 - false, // [177] 0xb1 - false, // [178] 0xb2 - false, // [179] 0xb3 - false, // [180] 0xb4 - false, // [181] 0xb5 - false, // [182] 0xb6 - false, // [183] 0xb7 - false, // [184] 0xb8 - false, // [185] 0xb9 - false, // [186] 0xba - false, // [187] 0xbb - false, // [188] 0xbc - false, // [189] 0xbd - false, // [190] 0xbe - false, // [191] 0xbf - false, // [192] 0xc0 - false, // [193] 0xc1 - false, // [194] 0xc2 - false, // [195] 0xc3 - false, // [196] 0xc4 - false, // [197] 0xc5 - false, // [198] 0xc6 - false, // [199] 0xc7 - false, // [200] 0xc8 - false, // [201] 0xc9 - false, // [202] 0xca - false, // [203] 0xcb - false, // [204] 0xcc - false, // [205] 0xcd - false, // [206] 0xce - false, // [207] 0xcf - false, // [208] 0xd0 - false, // [209] 0xd1 - false, // [210] 0xd2 - false, // [211] 0xd3 - false, // [212] 0xd4 - false, // [213] 0xd5 - false, // [214] 0xd6 - false, // [215] 0xd7 - false, // [216] 0xd8 - false, // [217] 0xd9 - false, // [218] 0xda - false, // [219] 0xdb - false, // [220] 0xdc - false, // [221] 0xdd - false, // [222] 0xde - false, // [223] 0xdf - false, // [224] 0xe0 - false, // [225] 0xe1 - false, // [226] 0xe2 - false, // [227] 0xe3 - false, // [228] 0xe4 - false, // [229] 0xe5 - false, // [230] 0xe6 - false, // [231] 0xe7 - false, // [232] 0xe8 - false, // [233] 0xe9 - false, // [234] 0xea - false, // [235] 0xeb - false, // [236] 0xec - false, // [237] 0xed - false, // [238] 0xee - false, // [239] 0xef - false, // [240] 0xf0 - false, // [241] 0xf1 - false, // [242] 0xf2 - false, // [243] 0xf3 - false, // [244] 0xf4 - false, // [245] 0xf5 - false, // [246] 0xf6 - false, // [247] 0xf7 - false, // [248] 0xf8 - false, // [249] 0xf9 - false, // [250] 0xfa - false, // [251] 0xfb - false, // [252] 0xfc - false, // [253] 0xfd - false, // [254] 0xfe - false, // [255] 0xff - }; - -bool g_IsACGTU[256] = - { - false, // [ 0] 0x00 - false, // [ 1] 0x01 - false, // [ 2] 0x02 - false, // [ 3] 0x03 - false, // [ 4] 0x04 - false, // [ 5] 0x05 - false, // [ 6] 0x06 - false, // [ 7] 0x07 - false, // [ 8] 0x08 - false, // [ 9] 0x09 - false, // [ 10] 0x0a - false, // [ 11] 0x0b - false, // [ 12] 0x0c - false, // [ 13] 0x0d - false, // [ 14] 0x0e - false, // [ 15] 0x0f - false, // [ 16] 0x10 - false, // [ 17] 0x11 - false, // [ 18] 0x12 - false, // [ 19] 0x13 - false, // [ 20] 0x14 - false, // [ 21] 0x15 - false, // [ 22] 0x16 - false, // [ 23] 0x17 - false, // [ 24] 0x18 - false, // [ 25] 0x19 - false, // [ 26] 0x1a - false, // [ 27] 0x1b - false, // [ 28] 0x1c - false, // [ 29] 0x1d - false, // [ 30] 0x1e - false, // [ 31] 0x1f - false, // [ 32] ' ' - false, // [ 33] '!' - false, // [ 34] '"' - false, // [ 35] '#' - false, // [ 36] '$' - false, // [ 37] '%' - false, // [ 38] '&' - false, // [ 39] ''' - false, // [ 40] '(' - false, // [ 41] ')' - false, // [ 42] '*' - false, // [ 43] '+' - false, // [ 44] ',' - false, // [ 45] '-' - false, // [ 46] '.' - false, // [ 47] '/' - false, // [ 48] '0' - false, // [ 49] '1' - false, // [ 50] '2' - false, // [ 51] '3' - false, // [ 52] '4' - false, // [ 53] '5' - false, // [ 54] '6' - false, // [ 55] '7' - false, // [ 56] '8' - false, // [ 57] '9' - false, // [ 58] ':' - false, // [ 59] ';' - false, // [ 60] '<' - false, // [ 61] '=' - false, // [ 62] '>' - false, // [ 63] '?' - false, // [ 64] '@' - true, // [ 65] 'A' (ACGT) - false, // [ 66] 'B' - true, // [ 67] 'C' (ACGT) - false, // [ 68] 'D' - false, // [ 69] 'E' - false, // [ 70] 'F' - true, // [ 71] 'G' (ACGT) - false, // [ 72] 'H' - false, // [ 73] 'I' - false, // [ 74] 'J' - false, // [ 75] 'K' - false, // [ 76] 'L' - false, // [ 77] 'M' - false, // [ 78] 'N' - false, // [ 79] 'O' - false, // [ 80] 'P' - false, // [ 81] 'Q' - false, // [ 82] 'R' - false, // [ 83] 'S' - true, // [ 84] 'T' (ACGT) - true, // [ 85] 'U' (ACGT) - false, // [ 86] 'V' - false, // [ 87] 'W' - false, // [ 88] 'X' - false, // [ 89] 'Y' - false, // [ 90] 'Z' - false, // [ 91] '[' - false, // [ 92] '\' - false, // [ 93] ']' - false, // [ 94] '^' - false, // [ 95] '_' - false, // [ 96] '`' - true, // [ 97] 'A' (ACGT) - false, // [ 98] 'B' - true, // [ 99] 'C' (ACGT) - false, // [100] 'D' - false, // [101] 'E' - false, // [102] 'F' - true, // [103] 'G' (ACGT) - false, // [104] 'H' - false, // [105] 'I' - false, // [106] 'J' - false, // [107] 'K' - false, // [108] 'L' - false, // [109] 'M' - false, // [110] 'N' - false, // [111] 'O' - false, // [112] 'P' - false, // [113] 'Q' - false, // [114] 'R' - false, // [115] 'S' - true, // [116] 'T' (ACGT) - true, // [117] 'U' (ACGT) - false, // [118] 'V' - false, // [119] 'W' - false, // [120] 'X' - false, // [121] 'Y' - false, // [122] 'Z' - false, // [123] '{' - false, // [124] '|' - false, // [125] '}' - false, // [126] '~' - false, // [127] 0x7f - false, // [128] 0x80 - false, // [129] 0x81 - false, // [130] 0x82 - false, // [131] 0x83 - false, // [132] 0x84 - false, // [133] 0x85 - false, // [134] 0x86 - false, // [135] 0x87 - false, // [136] 0x88 - false, // [137] 0x89 - false, // [138] 0x8a - false, // [139] 0x8b - false, // [140] 0x8c - false, // [141] 0x8d - false, // [142] 0x8e - false, // [143] 0x8f - false, // [144] 0x90 - false, // [145] 0x91 - false, // [146] 0x92 - false, // [147] 0x93 - false, // [148] 0x94 - false, // [149] 0x95 - false, // [150] 0x96 - false, // [151] 0x97 - false, // [152] 0x98 - false, // [153] 0x99 - false, // [154] 0x9a - false, // [155] 0x9b - false, // [156] 0x9c - false, // [157] 0x9d - false, // [158] 0x9e - false, // [159] 0x9f - false, // [160] 0xa0 - false, // [161] 0xa1 - false, // [162] 0xa2 - false, // [163] 0xa3 - false, // [164] 0xa4 - false, // [165] 0xa5 - false, // [166] 0xa6 - false, // [167] 0xa7 - false, // [168] 0xa8 - false, // [169] 0xa9 - false, // [170] 0xaa - false, // [171] 0xab - false, // [172] 0xac - false, // [173] 0xad - false, // [174] 0xae - false, // [175] 0xaf - false, // [176] 0xb0 - false, // [177] 0xb1 - false, // [178] 0xb2 - false, // [179] 0xb3 - false, // [180] 0xb4 - false, // [181] 0xb5 - false, // [182] 0xb6 - false, // [183] 0xb7 - false, // [184] 0xb8 - false, // [185] 0xb9 - false, // [186] 0xba - false, // [187] 0xbb - false, // [188] 0xbc - false, // [189] 0xbd - false, // [190] 0xbe - false, // [191] 0xbf - false, // [192] 0xc0 - false, // [193] 0xc1 - false, // [194] 0xc2 - false, // [195] 0xc3 - false, // [196] 0xc4 - false, // [197] 0xc5 - false, // [198] 0xc6 - false, // [199] 0xc7 - false, // [200] 0xc8 - false, // [201] 0xc9 - false, // [202] 0xca - false, // [203] 0xcb - false, // [204] 0xcc - false, // [205] 0xcd - false, // [206] 0xce - false, // [207] 0xcf - false, // [208] 0xd0 - false, // [209] 0xd1 - false, // [210] 0xd2 - false, // [211] 0xd3 - false, // [212] 0xd4 - false, // [213] 0xd5 - false, // [214] 0xd6 - false, // [215] 0xd7 - false, // [216] 0xd8 - false, // [217] 0xd9 - false, // [218] 0xda - false, // [219] 0xdb - false, // [220] 0xdc - false, // [221] 0xdd - false, // [222] 0xde - false, // [223] 0xdf - false, // [224] 0xe0 - false, // [225] 0xe1 - false, // [226] 0xe2 - false, // [227] 0xe3 - false, // [228] 0xe4 - false, // [229] 0xe5 - false, // [230] 0xe6 - false, // [231] 0xe7 - false, // [232] 0xe8 - false, // [233] 0xe9 - false, // [234] 0xea - false, // [235] 0xeb - false, // [236] 0xec - false, // [237] 0xed - false, // [238] 0xee - false, // [239] 0xef - false, // [240] 0xf0 - false, // [241] 0xf1 - false, // [242] 0xf2 - false, // [243] 0xf3 - false, // [244] 0xf4 - false, // [245] 0xf5 - false, // [246] 0xf6 - false, // [247] 0xf7 - false, // [248] 0xf8 - false, // [249] 0xf9 - false, // [250] 0xfa - false, // [251] 0xfb - false, // [252] 0xfc - false, // [253] 0xfd - false, // [254] 0xfe - false, // [255] 0xff - }; - -float g_AminoFreqs[20] = - { - 0.0777f, // 'A' = Ala - 0.0161f, // 'C' = Cys - 0.0527f, // 'D' = Asp - 0.0631f, // 'E' = Glu - 0.0417f, // 'F' = Phe - 0.0718f, // 'G' = Gly - 0.0238f, // 'H' = His - 0.0606f, // 'I' = Ile - 0.0601f, // 'K' = Lys - 0.0906f, // 'L' = Leu - 0.0233f, // 'M' = Met - 0.0439f, // 'N' = Asn - 0.0456f, // 'P' = Pro - 0.0368f, // 'Q' = Gln - 0.0526f, // 'R' = Arg - 0.0639f, // 'S' = Ser - 0.0570f, // 'T' = Thr - 0.0712f, // 'V' = Val - 0.0134f, // 'W' = Trp - 0.0339f, // 'Y' = Tyr - }; diff --git a/alpha.h b/alpha.h deleted file mode 100644 index e021b7f..0000000 --- a/alpha.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef alpha_h -#define alpha_h - -#include -#include - -using namespace std; - -const unsigned INVALID_LETTER = 0; -const unsigned char INVALID_CHAR = '?'; - -extern unsigned g_CharToLetterAmino[]; -extern unsigned g_CharToLetterAminoStop[]; -extern unsigned char g_LetterToCharAmino[]; -extern unsigned g_CharToLetterNucleo[]; -extern unsigned char g_LetterToCharNucleo[]; -extern unsigned g_CodonWordToAminoLetter[]; -extern char g_CodonWordToAminoChar[]; -extern unsigned char g_CharToCompChar[]; -extern unsigned g_CharToCompLetter[]; -extern bool g_IsAminoChar[]; -extern bool g_IsNucleoChar[]; -extern bool g_IsACGTU[]; -extern float g_AminoFreqs[]; - -extern unsigned g_CharToLetterRed[]; -extern unsigned char g_LetterToCharRed[]; -extern unsigned g_RedAlphaSize; - -void LogRedAlphaRed(); -void ReadRedAlphaFromFile(const string &FileName); -unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2, - unsigned char c3); - -static inline bool AminoLetterIsStartCodon(unsigned char Letter) - { - return Letter == 10; - } - -static inline bool AminoLetterIsStopCodon(unsigned char Letter) - { - return Letter == 20; - } - -const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo); -const char *WordToStrNucleo(unsigned Word, unsigned WordLength); -const char *WordToStrAmino(unsigned Word, unsigned WordLength); -const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str); - -#endif // alpha_h diff --git a/alpha2.cpp b/alpha2.cpp deleted file mode 100644 index 26bc1c6..0000000 --- a/alpha2.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "myutils.h" -#include "alpha.h" -#include "timing.h" - -bool isgap(byte c) - { - return c == '-' || c == '.'; - } - -const char *WordToStrAmino(unsigned Word, unsigned WordLength) - { - static char Str[32]; - for (unsigned i = 0; i < WordLength; ++i) - { - unsigned Letter = Word%20; - Str[WordLength-i-1] = g_LetterToCharAmino[Letter]; - Word /= 20; - } - Str[WordLength] = 0; - return Str; - } - -const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str) - { - for (unsigned i = 0; i < WordLength; ++i) - { - unsigned Letter = Word%20; - Str[WordLength-i-1] = g_LetterToCharAmino[Letter]; - Word /= 20; - } - Str[WordLength] = 0; - return Str; - } - -const char *WordToStrNucleo(unsigned Word, unsigned WordLength) - { - static char Str[32]; - for (unsigned i = 0; i < WordLength; ++i) - { - unsigned Letter = Word%4; - Str[WordLength-i-1] = g_LetterToCharNucleo[Letter]; - Word /= 4; - } - Str[WordLength] = 0; - return Str; - } - -const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo) - { - return (Nucleo ? WordToStrNucleo : WordToStrAmino)(Word, WordLength); - } - -byte *RevCompAlloc(const byte *Seq, unsigned L) - { - byte *RCSeq = MYALLOC(byte, L, Alpha); - - for (unsigned i = 0; i < L; ++i) - RCSeq[L-i-1] = g_CharToCompChar[Seq[i]]; - - return RCSeq; - } - -void RevCompInPlace(byte *Seq, unsigned L) - { - unsigned L1 = L - 1; - unsigned L2 = L/2; - for (unsigned i = 0; i < L2; ++i) - { - unsigned j = L1 - i; - unsigned ci = Seq[i]; - unsigned cj = Seq[j]; - - unsigned ri = g_CharToCompChar[ci]; - unsigned rj = g_CharToCompChar[cj]; - - Seq[i] = rj; - Seq[j] = ri; - } - - if (L%2 == 1) - Seq[L2] = g_CharToCompChar[Seq[L2]]; - } - -void RevComp(const byte *Seq, unsigned L, byte *RCSeq) - { - for (unsigned i = 0; i < L; ++i) - RCSeq[L-i-1] = g_CharToCompChar[Seq[i]]; - } - -unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2, - unsigned char c3) - { - unsigned Letter1 = g_CharToLetterNucleo[c1]; - unsigned Letter2 = g_CharToLetterNucleo[c2]; - unsigned Letter3 = g_CharToLetterNucleo[c3]; - unsigned Word = Letter1*(4*4) + Letter2*4 + Letter3; - - unsigned Letter = g_CodonWordToAminoLetter[Word]; - return g_LetterToCharAmino[Letter]; - } diff --git a/chainer.h b/chainer.h deleted file mode 100644 index a954dc0..0000000 --- a/chainer.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef chainer_h -#define chainer_h - -#include "hsp.h" -#include "seq.h" -#include - -const float BAD_SCORE = -9e9f; - -struct TargetHit - { - unsigned TargetIndex; - unsigned TargetLo; - unsigned TargetHi; - int QueryFrame; - float RawScore; // SOMETIMES USED FOR BIT SCORE!!! -// unsigned TargetLength; - - void LogMe() const - { - Log("lo %u, hi %u, frame %d, score %.1f\n", - TargetLo, TargetHi, QueryFrame, RawScore); - } - }; - -struct ChainData - { - unsigned LastHSPIndex; - unsigned Ahi; - unsigned Bhi; - float Score; - }; - -class Chainer - { -public: - HSPData **m_HSPs; // memory owned elsewhere - unsigned m_HSPCount; - unsigned m_MaxHSPCount; - - BPData *m_BPs; - - unsigned *m_PrevHSPIndexes; // Predecessor in chain - float *m_HSPIndexToChainScore; - - list m_Chains; // Live HSP indexes - -public: - Chainer(); - ~Chainer(); - void Reset(); - void Clear(bool ctor = false); - float Chain(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain, - unsigned &OptChainLength); - bool ResolveOverlaps(const SeqData &SA, const SeqData &SB, double MinScore, - const float * const *SubstMx, HSPData **InHSPs, unsigned InHSPCount, - HSPData **OutHSPs, unsigned &OutHSPCount); - void ResolveOverlap(HSPData &HSP1, HSPData &HSP2); - - float ChainBrute(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain, - unsigned &OptChainLength); - void LogMe() const; - void LogHSPs(HSPData **HSPs, unsigned HSPCount) const; - void LogBPs() const; - - static bool IsValidChain(HSPData **HSPs, unsigned HSPCount); - static void AssertValidChain(HSPData **HSPs, unsigned HSPCount); - static void LogChain(HSPData **HSPs, unsigned HSPCount); - static void LogChain2(HSPData **HSPs, unsigned HSPCount); - static float GetChainScore(HSPData **HSPs, unsigned HSPCount); - -private: - void AllocHSPCount(unsigned MaxHSPCount); - void SetBPs(); - void SortBPs(); - unsigned FindBestChainLT(unsigned Ahi, unsigned Bhi); - }; - -#endif // chainer_h diff --git a/chime.h b/chime.h deleted file mode 100644 index 1b0662a..0000000 --- a/chime.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef chime_h -#define chime_h - -#include "seq.h" - -struct ChimeHit2 - { - string QLabel; - string ALabel; - string BLabel; - string Q3; - string A3; - string B3; - - //unsigned LY, LN, LA, LD; - //unsigned RY, RN, RA, RD; - double PctIdQT, PctIdQA, PctIdQB, PctIdQM, PctIdAB; - - unsigned ColLo; - unsigned ColXLo; - unsigned ColXHi; - unsigned ColHi; - unsigned QXLo; - unsigned QXHi; - - double Div; - double Score; - double H; - - unsigned CS_LY, CS_LN, CS_LA, CS_RY, CS_RN, CS_RA; - - float AbQ; - float AbA; - float AbB; - - ChimeHit2() - { - Clear(); - } - - void Clear() - { - Q3.clear(); - A3.clear(); - B3.clear(); - QLabel.clear(); - ALabel.clear(); - BLabel.clear(); - - //LY = LN = LA = LD = UINT_MAX; - //RY = RN = RA = RD = UINT_MAX; - ColLo = ColHi = QXLo = QXHi = ColXLo = ColXHi = UINT_MAX; - CS_LY = CS_LN = CS_LA = CS_RY = CS_RN = CS_RA = UINT_MAX; - PctIdQT = PctIdQA = PctIdQB = PctIdQM = PctIdAB = -1.0; - Div = -1.0; - H = -1.0; - Score = -1.0; - AbQ = AbA = AbB = -1.0f; - }; - - bool Accept() const - { - return Score >= opt_minh && Div >= opt_mindiv && CS_LY >= opt_mindiffs && CS_RY >= opt_mindiffs; - } - - void LogMe() const - { - Log("@L %c ", yon(Score >= 1.0 && Div >= 1.0)); - Log(" %.4f", Score); - Log(" LY %u LN %u LA %u", CS_LY, CS_LN, CS_LA); - Log(" RY %u RN %u RA %u", CS_RY, CS_RN, CS_RA); - Log(" Div %.1f%%", Div); - Log(" Q=%s", QLabel.c_str()); - Log(" A=%s", ALabel.c_str()); - Log(" B=%s", BLabel.c_str()); - Log(" QA %.1f%% QB=%.1f%% AB=%.1f%% QM=%.1f%%", PctIdQA, PctIdQB, PctIdAB, PctIdQM); - Log("\n"); - } - - bool operator<(const ChimeHit2 &rhs) const - { - if (Score == rhs.Score) - return Div > rhs.Div; - return Score > rhs.Score; - } - }; - -static inline bool isacgt(char c) - { - return c == 'A' || c == 'C' || c == 'G' || c == 'T'; - } - -static bool inline isgap(char c) - { - return c == '-' || c == '.'; - } - -void GetChunkInfo(unsigned L, unsigned &Length, vector &Los); -float GetAbFromLabel(const string &Label); -void WriteChimeHitCS(FILE *f, const ChimeHit2 &Hit); -void WriteChimeHit(FILE *f, const ChimeHit2 &Hit); -void WriteChimeFileHdr(FILE *f); - -#endif // chime_h diff --git a/classify.cpp b/classify.cpp index 3bf0d57..2d01183 100644 --- a/classify.cpp +++ b/classify.cpp @@ -296,7 +296,8 @@ int Classify::readTaxonomy(string file) { delete buf4; istringstream iss (tempBuf,istringstream::in); - iss >> name >> taxInfo; + iss >> name; m->gobble(iss); + iss >> taxInfo; taxonomy[name] = taxInfo; phyloTree->addSeqToTree(name, taxInfo); } @@ -309,8 +310,9 @@ int Classify::readTaxonomy(string file) { //read template seqs and save while (!inTax.eof()) { - inTax >> name >> taxInfo; - + inTax >> name; m->gobble(inTax); + inTax >> taxInfo; + taxonomy[name] = taxInfo; phyloTree->addSeqToTree(name, taxInfo); diff --git a/diagbox.h b/diagbox.h deleted file mode 100644 index 0c5846c..0000000 --- a/diagbox.h +++ /dev/null @@ -1,193 +0,0 @@ -#ifndef diagbox_h -#define diagbox_h - -struct DiagBox; - -void GetDiagBox(unsigned LA, unsigned LB, unsigned DiagLo, unsigned DiagHi, DiagBox &Box); -void GetDiagRange(unsigned LA, unsigned LB, unsigned d, - unsigned &mini, unsigned &minj, unsigned &maxi, unsigned &maxj); -void GetDiagLoHi(unsigned LA, unsigned LB, const char *Path, - unsigned &dlo, unsigned &dhi); - -struct DiagBox - { - DiagBox() - { - } - - DiagBox(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi) - { - //GetDiagBox(LA, LB, DiagLo, DiagHi, *this); - //Validate(); - Init(LA_, LB_, DiagLo, DiagHi); - } - - void Init(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi) - { - GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this); - Validate(); - } - - unsigned LA; - unsigned LB; - - unsigned dlo; - unsigned dhi; - - unsigned dlo_mini; - unsigned dlo_minj; - - unsigned dlo_maxi; - unsigned dlo_maxj; - - unsigned dhi_mini; - unsigned dhi_minj; - - unsigned dhi_maxi; - unsigned dhi_maxj; - - unsigned GetDiag(unsigned i, unsigned j) const - { - return LA - i + j; - } - -// i, j are positions 0..LA-1, 0..LB-1. - bool InBox(unsigned i, unsigned j) const - { - unsigned d = GetDiag(i, j); - return d >= dlo && d <= dhi; - } - -/*** -i, j are 0-based prefix lengths 0..LA, 0..LB. - -A full path is in the box iff all match pairs are in the box. - -A partial path that aligns a prefix of A to a prefix of B as -in D.P.) is in the box iff it is is the prefix of at least -one full path that is in the box. - -A D.P. matrix entry X[i][j] is in the box iff there is at -least one full path aligning the first i letters of A and -the first j letters of B ending in a column of type X, i.e. -if there exists a partial path in the box that ends in X. - -Assume terminals appear in all paths, and DI/ID forbidden. - -Intuitively seems that by these definitions D is in box iff -DM or MD is in box, I is in box iff IM or MI is in box. -Don't have proof.. -***/ - bool InBoxDPM(unsigned i, unsigned j) const - { - // Special case for M[0][0] - if (i == 0 && j == 0) - return true; - if (i == 0 || j == 0) - return false; - unsigned d = GetDiag(i-1, j-1); - return d >= dlo && d <= dhi; - } - - bool InBoxDPD(unsigned i, unsigned j) const - { - bool MD = i == 0 ? false : InBoxDPM(i-1, j); - bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1); - return MD || DM; - } - - bool InBoxDPI(unsigned i, unsigned j) const - { - bool MI = j == 0 ? false : InBoxDPM(i, j-1); - bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1); - return MI || IM; - } - - // d = LA - i + j = 1 .. LA+LB-1 - void Validate() const - { - asserta(dlo <= dhi); - asserta(dlo >= GetDiag(LA-1, 0)); - asserta(dhi <= GetDiag(0, LB-1)); - - asserta(GetDiag(dlo_mini, dlo_minj) == dlo); - asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo); - asserta(GetDiag(dhi_mini, dhi_minj) == dhi); - asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi); - - asserta(dlo_mini >= dhi_mini); - asserta(dlo_minj <= dhi_minj); - asserta(dlo_maxi >= dhi_maxi); - asserta(dlo_maxj <= dhi_maxj); - } - - unsigned GetMini() const - { - return dhi_mini; - } - - unsigned GetMaxi() const - { - return dlo_maxi; - } - - unsigned GetMinj() const - { - return dlo_minj; - } - - unsigned GetMaxj() const - { - return dhi_maxj; - } -/*** - i = 0..LA-1 - j = 0..LB-1 - d = LA - i + j = 1 .. LA+LB-1 - j = d - LA + i - i = LA - d + j -***/ - void GetRange_j(unsigned i, unsigned &Startj, unsigned &Endj) const - { - // j = d - LA + i - if (dlo + i >= LA) - Startj = dlo + i - LA; - else - Startj = 0; - - if (Startj >= LB) - Startj = LB - 1; - - if (dhi + i + 1 >= LA) - Endj = dhi + i + 1 - LA; - else - Endj = 0; - - if (Endj > LB) - Endj = LB; - - asserta(Endj >= Startj); - } - - void LogMe() const - { - Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n", - LA, LB, - dlo, - dlo_mini, dlo_minj, - dlo_maxi, dlo_maxj, - dhi, - dhi_mini, dhi_minj, - dhi_maxi, dhi_maxj, - GetMini(), GetMaxi(), - GetMinj(), GetMaxj()); - } - }; - -typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); - -const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); - -#endif // diagbox_h diff --git a/dp.h b/dp.h deleted file mode 100644 index c771538..0000000 --- a/dp.h +++ /dev/null @@ -1,164 +0,0 @@ -#ifndef dp_h -#define dp_h - -#define SAVE_FAST 0 - -#include "myutils.h" -#include "mx.h" -#include "seqdb.h" -#include "diagbox.h" -#include "path.h" -#include "alnparams.h" -#include "alnheuristics.h" -#include "hspfinder.h" - -typedef void (*OnPathFn)(const string &Path, bool Full); - -enum XType - { - XType_Full=1, - XType_Fwd=2, - XType_Bwd=3, - }; - -// public -float ViterbiBrute(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD); - -float ViterbiSimple(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, PathData &PD); - -float ViterbiSimpleBand(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, unsigned DiagLo, unsigned DiagHi, PathData &PD); - -float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, PathData &PD); - -float ViterbiFastBand(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD); - -float ViterbiFastMainDiag(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned BandRadius, const AlnParams &AP, PathData &PD); - -float XDropFwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD); - -float XDropBwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD); - -float XDropFwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD); - -float XDropBwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD); - -void XDropAlign(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned AncLoi, unsigned AncLoj, unsigned AncLen, const AlnParams &AP, - float XDrop, HSPData &HSP, PathData &PD); - -float SWSimple(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj, - unsigned &Hij, PathData &PD); - -float SWFast(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj, - unsigned &Hij, PathData &PD); - -void SWFast2(const SeqData &SA, const SeqData &SB, const AlnParams &AP, - HSPData &HSP, PathData &PD); - -void SWSimple2(const SeqData &SA, const SeqData &SB, const AlnParams &AP, - HSPData &HSP, PathData &PD); - -float SWUngapped(const byte *A, unsigned LA, const byte *B, unsigned LB, - const float * const *SubstMx, unsigned &LoA, unsigned &LoB, unsigned &Len); - -void SWUngapped2(const SeqData &SA, const SeqData &SB, const AlnParams &AP, - HSPData &HSP); - -float SWFastNTB(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP); - -void GlobalAlignBand(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, unsigned BandRadius, PathData &PD); - -bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &AP, - const AlnHeuristics &AH, HSPFinder &HF, float MinFractId, float &HSPFractId, - PathData &PD); - -bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path); - -void GetBruteMxs(Mx **M, Mx **D, Mx **I); -void GetSimpleDPMxs(Mx **M, Mx **D, Mx **I); -void GetSimpleBandMxs(Mx **M, Mx **D, Mx **I); -void GetXDropFwdSimpleDPMxs(Mx **M, Mx **D, Mx **I); -#if SAVE_FAST -void GetFastMxs(Mx **M, Mx **D, Mx **I); -void GetFastBandMxs(Mx **M, Mx **D, Mx **I); -#endif - -// private -void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD); -void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj, - unsigned &Leni, unsigned &Lenj, PathData &PD); -void EnumPaths(unsigned L1, unsigned L2, bool SubPaths, OnPathFn OnPath); -void AllocBit(unsigned LA, unsigned LB); - -const byte TRACEBITS_DM = 0x01; -const byte TRACEBITS_IM = 0x02; -const byte TRACEBITS_MD = 0x04; -const byte TRACEBITS_MI = 0x08; -const byte TRACEBITS_SM = 0x10; -const byte TRACEBITS_UNINIT = ~0x1f; - -extern Mx g_Mx_TBBit; -extern float *g_DPRow1; -extern float *g_DPRow2; -extern byte **g_TBBit; - -static inline void Max_xM(float &Score, float MM, float DM, float IM, byte &State) - { - Score = MM; - State = 'M'; - - if (DM > Score) - { - Score = DM; - State = 'D'; - } - if (IM > Score) - { - Score = IM; - State = 'I'; - } - } - -static inline void Max_xD(float &Score, float MD, float DD, byte &State) - { - if (MD >= DD) - { - Score = MD; - State = 'M'; - } - else - { - Score = DD; - State = 'D'; - } - } - -static inline void Max_xI(float &Score, float MI, float II, byte &State) - { - if (MI >= II) - { - Score = MI; - State = 'M'; - } - else - { - Score = II; - State = 'I'; - } - } - -#endif // dp_h diff --git a/evalue.h b/evalue.h deleted file mode 100644 index c9308db..0000000 --- a/evalue.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef evalue_h -#define evalue_h - -#include - -void SetKarlin(double GappedLambda, double UngappedLambda, - double GappedK, double UngappedK, double DBLength);\ - -double GetKarlinDBLength(); -void SetKarlinDBLength(double DBLength); -void LogKarlin(); -void SetKarlinAmino(double DBLength); -void SetKarlinNucleo(double DBLength); -void SetKarlin(double DBLength, bool Nucleo); -double ComputeBitScoreGapped(double Score); -double ComputeBitScoreUngapped(double Score); -double ComputeEvalueGapped(double Score, unsigned QueryLength); -double ComputeEvalueUngapped(double Score, unsigned QueryLength); -double ComputeMinScoreGivenEvalueAGapped(double Evalue, unsigned Area); -double ComputeMinScoreGivenEvalueAUngapped(double Evalue, unsigned Area); -double ComputeMinScoreGivenEvalueQGapped(double Evalue, unsigned QueryLength); -double ComputeMinScoreGivenEvalueQUngapped(double Evalue, unsigned QueryLength); -double ComputeEvalueGappedFromBitScore(double BitScore, unsigned QueryLength); - -#endif // evalue_h diff --git a/fractid.cpp b/fractid.cpp deleted file mode 100644 index f298877..0000000 --- a/fractid.cpp +++ /dev/null @@ -1,449 +0,0 @@ -#include "myutils.h" -#include "alpha.h" - -//unsigned g_MaxL = 0; - -static bool *g_IsChar = g_IsAminoChar; - -// Term gaps allowed in query (A) only -static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path, - char *ptrDesc) - { - if (*Path == 'D') - { - if (ptrDesc != 0) - sprintf(ptrDesc, "(term gap in Query)"); - return 0; - } - - const char *LastM = 0; - for (const char *p = Path; *p; ++p) - if (*p == 'M') - LastM = p; - - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Ids = 0; - unsigned Diffs = 0; - unsigned Cols = 0; - for (const char *p = Path; *p && p != LastM; ++p) - { - ++Cols; - char c = *p; - if (c == 'M') - { - byte a = toupper(A[PosA]); - byte b = toupper(B[PosB]); - if (g_IsChar[a] && g_IsChar[b]) - { - if (a == b) - ++Ids; - else - ++Diffs; - } - else - --Cols; - } - if (c == 'D' || c == 'I') - ++Diffs; - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - - double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols)); - if (ptrDesc != 0) - sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols); - return FractId; - } - -static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path, - char *ptrDesc) - { - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Ids = 0; - unsigned Diffs = 0; - unsigned Cols = 0; - for (const char *p = Path; *p; ++p) - { - ++Cols; - char c = *p; - if (c == 'M') - { - byte a = toupper(A[PosA]); - byte b = toupper(B[PosB]); - if (g_IsChar[a] && g_IsChar[b]) - { - if (a == b) - ++Ids; - else - ++Diffs; - } - else - --Cols; - } - if (c == 'D' || c == 'I') - ++Diffs; - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - - double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols)); - if (ptrDesc != 0) - sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols); - return FractId; - } - -static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B, - const char *Path, char *ptrDesc) - { - unsigned i = 0; - unsigned FirstM = UINT_MAX; - unsigned LastM = UINT_MAX; - for (const char *p = Path; *p; ++p) - { - if (*p == 'M') - { - if (FirstM == UINT_MAX) - FirstM = i; - LastM = i; - } - ++i; - } - if (FirstM == UINT_MAX) - { - if (ptrDesc != 0) - strcpy(ptrDesc, "(no matches)"); - return 0.0; - } - - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Ids = 0; - unsigned Diffs = 0; - unsigned Cols = 0; - for (unsigned i = 0; i < FirstM; ++i) - { - char c = Path[i]; - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - - for (unsigned i = FirstM; i <= LastM; ++i) - { - ++Cols; - char c = Path[i]; - if (c == 'M') - { - byte a = toupper(A[PosA]); - byte b = toupper(B[PosB]); - if (g_IsChar[a] && g_IsChar[b]) - { - if (a == b) - ++Ids; - else - ++Diffs; - } - else - --Cols; - } - if (c == 'D' || c == 'I') - ++Diffs; - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - - double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols)); - if (ptrDesc != 0) - sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols); - return FractId; - } - -static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path, - char *ptrDesc) - { - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Mismatches = 0; - unsigned Gaps = 0; - for (const char *p = Path; *p; ++p) - { - char c = *p; - if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB])) - ++Mismatches; - if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M')) - ++Gaps; - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - unsigned Diffs = Gaps + Mismatches; - double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB)); - if (ptrDesc != 0) - sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]", - Gaps, Diffs, PosB); - double FractId = 1.0 - FractDiffs; - if (FractId < 0.0) - return 0.0; - return FractId; - } - -static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path, - char *ptrDesc) - { - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Ids = 0; - unsigned Wilds = 0; - unsigned Cols = 0; - for (const char *p = Path; *p; ++p) - { - ++Cols; - char c = *p; - if (c == 'M') - { - byte a = toupper(A[PosA]); - byte b = toupper(B[PosB]); - if (g_IsChar[a] && g_IsChar[b]) - { - if (a == b) - ++Ids; - } - else - ++Wilds; - } - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - asserta(Cols >= Wilds); - Cols -= Wilds; - double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols); - if (ptrDesc != 0) - sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols); - return FractId; - } - -static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path, - char *ptrDesc) - { - unsigned PosA = 0; - unsigned PosB = 0; - unsigned Ids = 0; - unsigned Wilds = 0; - for (const char *p = Path; *p; ++p) - { - char c = *p; - if (c == 'M') - { - byte a = toupper(A[PosA]); - byte b = toupper(B[PosB]); - if (g_IsChar[a] && g_IsChar[b]) - { - if (a == b) - ++Ids; - } - else - ++Wilds; - } - if (c == 'M' || c == 'D') - ++PosA; - if (c == 'M' || c == 'I') - ++PosB; - } - unsigned MinLen = min(PosA, PosB) - Wilds; - double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen)); - if (ptrDesc != 0) - sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen); - return FractId; - } - -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, - bool Nucleo, char *ptrDesc, unsigned IdDef) - { - if (Nucleo) - g_IsChar = g_IsACGTU; - else - g_IsChar = g_IsAminoChar; - - if (Path == 0) - { - if (ptrDesc != 0) - strcpy(ptrDesc, "(NULL path)"); - return 0.0; - } - - unsigned ColCount = (unsigned) strlen(Path); - if (ColCount == 0) - return 0.0; - - if (opt_leftjust) - { - if (Path[0] != 'M' || Path[ColCount-1] == 'D') - { - if (ptrDesc != 0) - strcpy(ptrDesc, "(leftjust)"); - return 0.0; - } - } - - if (opt_rightjust) - { - if (Path[0] == 'D' || Path[ColCount-1] != 'M') - { - if (ptrDesc != 0) - strcpy(ptrDesc, "(rightjust)"); - return 0.0; - } - } - - double FractId = 0.0; - //if (opt_idprefix > 0) - // { - // for (unsigned i = 0; i < opt_idprefix; ++i) - // { - // char c = Path[i]; - // if (c != 'M' || toupper(A[i]) != toupper(B[i])) - // { - // if (ptrDesc != 0) - // sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)", - // i, opt_idprefix); - // return 0.0; - // } - // } - // } - - //if (opt_idsuffix > 0) - // { - // unsigned Cols = strlen(Path); - // for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i) - // { - // unsigned k = Cols - 1 - i; - // char c = Path[k]; - // if (c != 'M' || toupper(A[k]) != toupper(B[k])) - // { - // if (ptrDesc != 0) - // sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)", - // i, opt_idsuffix); - // return 0.0; - // } - // } - // } - - if (opt_maxqgap > 0 || opt_maxtgap > 0) - { - unsigned L = 0; - const char *LastM = 0; - for (const char *p = Path; *p; ++p) - if (*p == 'M') - LastM = p; - -// g_MaxL = 0; - for (const char *p = Path; *p && p != LastM; ++p) - { - char c = *p; - switch (c) - { - case 'M': - if (L > 0) - { - if (p[-1] == 'D') - { - if (L > opt_maxtgap) - { - if (ptrDesc != 0) - sprintf(ptrDesc, "(maxtgap)"); - return 0.0; - } - } - else if (p[-1] == 'I') - { - if (L > opt_maxqgap) - { - if (ptrDesc != 0) - sprintf(ptrDesc, "(maxqgap)"); - return 0.0; - } - } - else - asserta(false); - } - L = 0; - break; - - case 'D': - case 'I': - ++L; - //if (L > g_MaxL) - // g_MaxL = L; - break; - - default: - asserta(false); - } - } - } - - switch (IdDef) - { - case 0: - FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc); - break; - - case 1: - FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc); - break; - - case 2: - FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc); - break; - - case 3: - FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc); - break; - - case 4: - FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc); - break; - - case 5: - FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc); - break; - - default: - Die("--iddef %u invalid", opt_iddef); - } - - return FractId; - } - -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, - bool Nucleo, char *ptrDesc) - { - return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef); - } - -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo) - { - return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0); - } - -double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path) - { - return GetFractIdGivenPath(A, B, Path.c_str(), true); - } - -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path) - { - return GetFractIdGivenPath(A, B, Path, true); - } diff --git a/getparents.cpp b/getparents.cpp deleted file mode 100644 index d82f902..0000000 --- a/getparents.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "myutils.h" -#include "chime.h" -#include "ultra.h" -#include - -void AddTargets(Ultra &U, const SeqData &Query, set &TargetIndexes); - -void GetChunkInfo(unsigned L, unsigned &Length, vector &Los) - { - Los.clear(); - - if (L <= opt_minchunk) - { - Length = L; - Los.push_back(0); - return; - } - - Length = (L - 1)/opt_chunks + 1; - if (Length < opt_minchunk) - Length = opt_minchunk; - - unsigned Lo = 0; - for (;;) - { - if (Lo + Length >= L) - { - Lo = L - Length - 1; - Los.push_back(Lo); - return; - } - Los.push_back(Lo); - Lo += Length; - } - } - -void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ, - vector &Parents) - { - Parents.clear(); - - set TargetIndexes; - - unsigned QL = QSD.L; - - SeqData QuerySD = QSD; - - unsigned ChunkLength; - vector ChunkLos; - GetChunkInfo(QL, ChunkLength, ChunkLos); - unsigned ChunkCount = SIZE(ChunkLos); - for (unsigned ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) - { - unsigned Lo = ChunkLos[ChunkIndex]; - asserta(Lo + ChunkLength <= QL); - - const byte *Chunk = QSD.Seq + Lo; - - // THIS MESSES UP --self!! - //char Prefix[32]; - //sprintf(Prefix, "%u|", Lo); - //string ChunkLabel = string(Prefix) + string(QSD.Label); - - //QuerySD.Label = ChunkLabel.c_str(); - QuerySD.Seq = Chunk; - QuerySD.L = ChunkLength; - - AddTargets(U, QuerySD, TargetIndexes); - - Lo += ChunkLength; - } - - for (set::const_iterator p = TargetIndexes.begin(); - p != TargetIndexes.end(); ++p) - { - unsigned TargetIndex = *p; - bool Accept = true; - if (AbQ > 0.0f) - { - const char *TargetLabel = U.GetSeedLabel(TargetIndex); - float AbT = GetAbFromLabel(string(TargetLabel)); - if (AbT > 0.0f && AbT < opt_abskew*AbQ) - Accept = false; - } - - if (Accept) - Parents.push_back(TargetIndex); - } - } diff --git a/globalalign2.cpp b/globalalign2.cpp deleted file mode 100644 index 6bb35a9..0000000 --- a/globalalign2.cpp +++ /dev/null @@ -1,45 +0,0 @@ -//#if UCHIMES - -#include "dp.h" -#include "seq.h" - -static AlnParams g_AP; -static bool g_APInitDone = false; - -bool GlobalAlign(const SeqData &Query, const SeqData &Target, PathData &PD) - { - if (!g_APInitDone) - { - g_AP.InitFromCmdLine(true); - g_APInitDone = true; - } - - ViterbiFast(Query.Seq, Query.L, Target.Seq, Target.L, g_AP, PD); - return true; - } - -bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path) - { - PathData PD; - GlobalAlign(Query, Target, PD); - Path = string(PD.Start); - return true; - } - -bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &/*AP*/, - const AlnHeuristics &AH, HSPFinder &/*HF*/, float /*MinFractId*/, float &/*HSPId*/, PathData &PD) - { - PD.Clear(); - string Path; - bool Found = GlobalAlign(Query, Target, Path); - if (!Found) - return false; - unsigned n = SIZE(Path); - PD.Alloc(n+1); - memcpy(PD.Front, Path.c_str(), n); - PD.Start = PD.Front; - PD.Start[n] = 0; - return true; - } - -//#endif // UCHIMES diff --git a/help.h b/help.h deleted file mode 100644 index 9d7a89f..0000000 --- a/help.h +++ /dev/null @@ -1,127 +0,0 @@ -"\n" -"Usage\n" -"-----\n" -"\n" -"uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n" -" [--uchimealns results.alns]\n" -"\n" -"Options\n" -"-------\n" -"\n" -"--input filename\n" -" Query sequences in FASTA format.\n" -" If the --db option is not specificed, uchime uses de novo\n" -" detection. In de novo mode, relative abundance must be given\n" -" by a string /ab=xxx/ somewhere in the label, where xxx is a\n" -" floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n" -"\n" -"--db filename\n" -" Reference database in FASTA format.\n" -" Optional, if not specified uchime uses de novo mode.\n" -"\n" -" ***WARNING*** The database is searched ONLY on the plus strand.\n" -" You MUST include reverse-complemented sequences in the database\n" -" if you want both strands to be searched.\n" -"\n" -"--abskew x\n" -" Minimum abundance skew. Default 1.9. De novo mode only.\n" -" Abundance skew is:\n" -" min [ abund(parent1), abund(parent2) ] / abund(query).\n" -"\n" -"--uchimeout filename\n" -" Output in tabbed format with one record per query sequence.\n" -" First field is score (h), second field is query label.\n" -" For details, see manual.\n" -"\n" -"--uchimealns filename\n" -" Multiple alignments of query sequences to parents in human-\n" -" readable format. Alignments show columns with differences\n" -" that support or contradict a chimeric model.\n" -"\n" -"--minh h\n" -" Mininum score to report chimera. Default 0.3. Values from 0.1\n" -" to 5 might be reasonable. Lower values increase sensitivity\n" -" but may report more false positives. If you decrease --xn,\n" -" you may need to increase --minh, and vice versa.\n" -"\n" -"--mindiv div\n" -" Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n" -" %%identity between query sequence and the closest candidate for\n" -" being a parent. If you don't care about very close chimeras,\n" -" then you could increase --mindiv to, say, 1.0 or 2.0, and\n" -" also decrease --min h, say to 0.1, to increase sensitivity.\n" -" How well this works will depend on your data. Best is to\n" -" tune parameters on a good benchmark.\n" -"\n" -"--xn beta\n" -" Weight of a no vote, also called the beta parameter. Default 8.0.\n" -" Decreasing this weight to around 3 or 4 may give better\n" -" performance on denoised data.\n" -"\n" -"--dn n\n" -" Pseudo-count prior on number of no votes. Default 1.4. Probably\n" -" no good reason to change this unless you can retune to a good\n" -" benchmark for your data. Reasonable values are probably in the\n" -" range from 0.2 to 2.\n" -"\n" -"--xa w\n" -" Weight of an abstain vote. Default 1. So far, results do not\n" -" seem to be very sensitive to this parameter, but if you have\n" -" a good training set might be worth trying. Reasonable values\n" -" might range from 0.1 to 2.\n" -"\n" -"--chunks n\n" -" Number of chunks to extract from the query sequence when searching\n" -" for parents. Default 4.\n" -"\n" -"--[no]ovchunks\n" -" [Do not] use overlapping chunks. Default do not.\n" -"\n" -"--minchunk n\n" -" Minimum length of a chunk. Default 64.\n" -"\n" -"--idsmoothwindow w\n" -" Length of id smoothing window. Default 32.\n" -"\n" -"--minsmoothid f\n" -" Minimum factional identity over smoothed window of candidate parent.\n" -" Default 0.95.\n" -"\n" -"--maxp n\n" -" Maximum number of candidate parents to consider. Default 2. In tests so\n" -" far, increasing --maxp gives only a very small improvement in sensivity\n" -" but tends to increase the error rate quite a bit.\n" -"\n" -"--[no]skipgaps\n" -"--[no]skipgaps2\n" -" These options control how gapped columns affect counting of diffs.\n" -" If --skipgaps is specified, columns containing gaps do not found as diffs.\n" -" If --skipgaps2 is specified, if column is immediately adjacent to\n" -" a column containing a gap, it is not counted as a diff.\n" -" Default is --skipgaps --skipgaps2.\n" -"\n" -"--minlen L\n" -"--maxlen L\n" -" Minimum and maximum sequence length. Defaults 10, 10000.\n" -" Applies to both query and reference sequences.\n" -"\n" -"--ucl\n" -" Use local-X alignments. Default is global-X. On tests so far, global-X\n" -" is always better; this option is retained because it just might work\n" -" well on some future type of data.\n" -"\n" -"--queryfract f\n" -" Minimum fraction of the query sequence that must be covered by a local-X\n" -" alignment. Default 0.5. Applies only when --ucl is specified.\n" -"\n" -"--quiet\n" -" Do not display progress messages on stderr.\n" -"\n" -"--log filename\n" -" Write miscellaneous information to the log file. Mostly of interest\n" -" to me (the algorithm developer). Use --verbose to get more info.\n" -"\n" -"--self\n" -" In reference database mode, exclude a reference sequence if it has\n" -" the same label as the query. This is useful for benchmarking by using\n" -" the ref db as a query to test for false positives.\n" diff --git a/hsp.h b/hsp.h deleted file mode 100644 index 339256f..0000000 --- a/hsp.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef hsp_h -#define hsp_h 1 - -struct HSPData - { - unsigned Loi; - unsigned Loj; - unsigned Leni; - unsigned Lenj; - float Score; - unsigned User; - - unsigned GetLength() const - { - if (Leni != Lenj) - Die("HSP::GetLength(): Leni %u, Lenj %u, Loi %u, Loj %u, Score %.1f", - Leni, Lenj, Loi, Loj, Score); - - return Leni; - } - - unsigned GetHii() const - { - assert(Leni > 0); - return Loi + Leni - 1; - } - - unsigned GetHij() const - { - assert(Lenj > 0); - return Loj + Lenj - 1; - } - - bool LeftA() const - { - return Loi == 0; - } - - bool LeftB() const - { - return Loj == 0; - } - - bool RightA(unsigned LA) const - { - return Loi + Leni == LA; - } - - bool RightB(unsigned LB) const - { - return Loj + Lenj == LB; - } - - unsigned GetIdCount(const byte *A, const byte *B) const - { - unsigned Count = 0; - unsigned K = GetLength(); - for (unsigned k = 0; k < K; ++k) - { - byte a = A[Loi+k]; - byte b = B[Loj+k]; - if (toupper(a) == toupper(b)) - Count++; - } - return Count; - } - - double OverlapFract(const HSPData &HSP) const - { - if (Leni == 0 || Lenj == 0) - return 0.0; - - unsigned MaxLoi = max(Loi, HSP.Loi); - unsigned MaxLoj = max(Loj, HSP.Loj); - unsigned MinHii = min(GetHii(), HSP.GetHii()); - unsigned MinHij = min(GetHij(), HSP.GetHij()); - - unsigned Ovi = (MinHii < MaxLoi) ? 0 : MinHii - MaxLoi; - unsigned Ovj = (MinHij < MaxLoj) ? 0 : MinHij - MaxLoj; - - asserta(Ovi <= Leni && Ovj <= Lenj); - return double(Ovi*Ovj)/double(Leni*Lenj); - } - - bool operator<(const HSPData &rhs) const - { - return Loi < rhs.Loi; - } - - void LogMe() const - { - Log("Loi=%u Loj=%u Li=%u Lj=%u Score=%.1f\n", Loi, Loj, Leni, Lenj, Score); - } - - void LogMe2() const - { - Log("(%u-%u,%u-%u/%.1f)", Loi, GetHii(), Loj, GetHij(), Score); - } - }; - -// Bendpoint -struct BPData - { - unsigned Pos; - bool IsLo; - unsigned Index; - - void LogMe() const - { - Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index); - } - }; - -#endif // hsp_h diff --git a/hspfinder.h b/hspfinder.h deleted file mode 100644 index 2b8e9d8..0000000 --- a/hspfinder.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef hspfinder_h -#define hspfinder_h - -#include "seq.h" - -class HSPFinder - { -public: - void SetA(const SeqData &/*SD*/) {} - void SetB(const SeqData &/*SD*/) {} - }; - -#endif // hspfinder_h diff --git a/make3way.cpp b/make3way.cpp deleted file mode 100644 index ce88f86..0000000 --- a/make3way.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include "myutils.h" -#include "sfasta.h" -#include "path.h" -#include "dp.h" - -void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD, - const string &PathQA, const string &PathQB, - string &Q3, string &A3, string &B3) - { - Q3.clear(); - A3.clear(); - B3.clear(); - -#if DEBUG - { - unsigned QLen = 0; - unsigned ALen = 0; - for (unsigned i = 0; i < SIZE(PathQA); ++i) - { - char c = PathQA[i]; - if (c == 'M' || c == 'D') - ++QLen; - if (c == 'M' || c == 'I') - ++ALen; - } - asserta(QLen == QSD.L); - asserta(ALen == ASD.L); - } - { - unsigned QLen = 0; - unsigned BLen = 0; - for (unsigned i = 0; i < SIZE(PathQB); ++i) - { - char c = PathQB[i]; - if (c == 'M' || c == 'D') - ++QLen; - if (c == 'M' || c == 'I') - ++BLen; - } - asserta(QLen == QSD.L); - asserta(BLen == BSD.L); - } -#endif - - const byte *Q = QSD.Seq; - const byte *A = ASD.Seq; - const byte *B = BSD.Seq; - - unsigned LQ = QSD.L; - unsigned LA = ASD.L; - unsigned LB = BSD.L; - - vector InsertCountsA(LQ+1, 0); - unsigned QPos = 0; - for (unsigned i = 0; i < SIZE(PathQA); ++i) - { - char c = PathQA[i]; - if (c == 'M' || c == 'D') - ++QPos; - else - { - asserta(c == 'I'); - asserta(QPos <= LQ); - ++(InsertCountsA[QPos]); - } - } - - vector InsertCountsB(LQ+1, 0); - QPos = 0; - for (unsigned i = 0; i < SIZE(PathQB); ++i) - { - char c = PathQB[i]; - if (c == 'M' || c == 'D') - ++QPos; - else - { - asserta(c == 'I'); - asserta(QPos <= LQ); - ++(InsertCountsB[QPos]); - } - } - - vector InsertCounts; - for (unsigned i = 0; i <= LQ; ++i) - { - unsigned is = max(InsertCountsA[i], InsertCountsB[i]); - InsertCounts.push_back(is); - } - - for (unsigned i = 0; i < LQ; ++i) - { - for (unsigned k = 0; k < InsertCounts[i]; ++k) - Q3.push_back('-'); - asserta(i < LQ); - Q3.push_back(toupper(Q[i])); - } - for (unsigned k = 0; k < InsertCounts[LQ]; ++k) - Q3.push_back('-'); - -// A - QPos = 0; - unsigned APos = 0; - unsigned is = 0; - for (unsigned i = 0; i < SIZE(PathQA); ++i) - { - char c = PathQA[i]; - if (c == 'M' || c == 'D') - { - unsigned isq = InsertCounts[QPos]; - asserta(is <= isq); - for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i) - A3.push_back('-'); - is = 0; - ++QPos; - } - if (c == 'M') - { - asserta(APos < LA); - A3.push_back(toupper(A[APos++])); - } - else if (c == 'D') - A3.push_back('-'); - else if (c == 'I') - { - ++is; - asserta(APos < LA); - A3.push_back(toupper(A[APos++])); - } - } - asserta(is <= InsertCounts[LQ]); - for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k) - A3.push_back('-'); - asserta(QPos == LQ); - asserta(APos == LA); - -// B - QPos = 0; - unsigned BPos = 0; - is = 0; - for (unsigned i = 0; i < SIZE(PathQB); ++i) - { - char c = PathQB[i]; - if (c == 'M' || c == 'D') - { - asserta(is <= InsertCounts[QPos]); - for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i) - B3.push_back('-'); - is = 0; - ++QPos; - } - if (c == 'M') - { - asserta(BPos < LB); - B3.push_back(toupper(B[BPos++])); - } - else if (c == 'D') - B3.push_back('-'); - else if (c == 'I') - { - ++is; - asserta(BPos < LB); - B3.push_back(toupper(B[BPos++])); - } - } - asserta(is <= InsertCounts[LQ]); - for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k) - B3.push_back('-'); - asserta(APos == LA); - asserta(BPos == LB); - - asserta(SIZE(Q3) == SIZE(A3)); - asserta(SIZE(Q3) == SIZE(B3)); - } diff --git a/makefile b/makefile index b8d4e2c..e06c334 100644 --- a/makefile +++ b/makefile @@ -39,7 +39,7 @@ ifeq ($(strip $(64BIT_VERSION)),yes) #if you are a linux user use the following line #CXXFLAGS += -mtune=native -march=native -m64 - CXXFLAGS += -DBIT_VERSION + CXXFLAGS += -DBIT_VERSION FORTRAN_FLAGS = -m64 endif diff --git a/mx.cpp b/mx.cpp deleted file mode 100644 index 48c347e..0000000 --- a/mx.cpp +++ /dev/null @@ -1,294 +0,0 @@ -#include "myutils.h" -#include "mx.h" -#include "seqdb.h" -#include "seq.h" - -char ProbToChar(float p); - -list *MxBase::m_Matrices = 0; -unsigned MxBase::m_AllocCount; -unsigned MxBase::m_ZeroAllocCount; -unsigned MxBase::m_GrowAllocCount; -double MxBase::m_TotalBytes; -double MxBase::m_MaxBytes; - -static const char *LogizeStr(const char *s) - { - double d = atof(s); - d = log(d); - return TypeToStr(float(d)); - } - -static const char *ExpizeStr(const char *s) - { - double d = atof(s); - d = exp(d); - return TypeToStr(float(d)); - } - -void MxBase::OnCtor(MxBase *Mx) - { - if (m_Matrices == 0) - m_Matrices = new list; - asserta(m_Matrices != 0); - m_Matrices->push_front(Mx); - } - -void MxBase::OnDtor(MxBase *Mx) - { - if (m_Matrices == 0) - { - Warning("MxBase::OnDtor, m_Matrices = 0"); - return; - } - for (list::iterator p = m_Matrices->begin(); - p != m_Matrices->end(); ++p) - { - if (*p == Mx) - { - m_Matrices->erase(p); - if (m_Matrices->empty()) - delete m_Matrices; - return; - } - } - Warning("MxBase::OnDtor, not found"); - } - -//float **MxBase::Getf(const string &Name) -// { -// Mx *m = (Mx *) Get(Name); -// asserta(m->GetTypeSize() == sizeof(float)); -// return m->GetData(); -// } -// -//double **MxBase::Getd(const string &Name) -// { -// Mx *m = (Mx *) Get(Name); -// asserta(m->GetTypeSize() == sizeof(double)); -// return m->GetData(); -// } -// -//char **MxBase::Getc(const string &Name) -// { -// Mx *m = (Mx *) Get(Name); -// asserta(m->GetTypeSize() == sizeof(char)); -// return m->GetData(); -// } - -void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqDB *DB, unsigned IdA, unsigned IdB) - { - Alloc(Name, RowCount, ColCount, DB, IdA, IdB, 0, 0); - } - -void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqData *SA, const SeqData *SB) - { - Alloc(Name, RowCount, ColCount, 0, UINT_MAX, UINT_MAX, SA, SB); - } - -void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqDB *DB, unsigned IdA, unsigned IdB, const SeqData *SA, const SeqData *SB) - { - StartTimer(MxBase_Alloc); - - ++m_AllocCount; - if (m_AllocatedRowCount == 0) - ++m_ZeroAllocCount; - - if (DB != 0) - { - asserta(IdA != UINT_MAX); - asserta(IdB != UINT_MAX); - asserta(RowCount >= DB->GetSeqLength(IdA) + 1); - asserta(ColCount >= DB->GetSeqLength(IdB) + 1); - } - if (RowCount > m_AllocatedRowCount || ColCount > m_AllocatedColCount) - { - if (m_AllocatedRowCount > 0) - { - if (opt_logmemgrows) - Log("MxBase::Alloc grow %s %u x %u -> %u x %u, %s bytes\n", - Name, m_AllocatedRowCount, m_AllocatedColCount, - RowCount, ColCount, - IntToStr(GetBytes())); - ++m_GrowAllocCount; - } - - m_TotalBytes -= GetBytes(); - - PauseTimer(MxBase_Alloc); - StartTimer(MxBase_FreeData); - FreeData(); - EndTimer(MxBase_FreeData); - StartTimer(MxBase_Alloc); - - unsigned N = max(RowCount + 16, m_AllocatedRowCount); - unsigned M = max(ColCount + 16, m_AllocatedColCount); - N = max(N, M); - - PauseTimer(MxBase_Alloc); - StartTimer(MxBase_AllocData); - AllocData(N, N); - EndTimer(MxBase_AllocData); - StartTimer(MxBase_Alloc); - - m_TotalBytes += GetBytes(); - if (m_TotalBytes > m_MaxBytes) - m_MaxBytes = m_TotalBytes; - } - - unsigned n = sizeof(m_Name)-1; - strncpy(m_Name, Name, n); - m_Name[n] = 0; - m_RowCount = RowCount; - m_ColCount = ColCount; - m_SeqDB = DB; - m_IdA = IdA; - m_IdB = IdB; - m_SA = SA; - m_SB = SB; - - EndTimer(MxBase_Alloc); - } - -void MxBase::LogMe(bool WithData, int Opts) const - { - Log("\n"); - if (Opts & OPT_EXP) - Log("Exp "); - else if (Opts & OPT_LOG) - Log("Log "); - bool ZeroBased = ((Opts & OPT_ZERO_BASED) != 0); - Log("%s(%p) Rows %u/%u, Cols %u/%u", - m_Name, this, - m_RowCount, m_AllocatedRowCount, - m_ColCount, m_AllocatedColCount); - if (m_SeqDB != 0 && m_IdA != UINT_MAX) - Log(", A=%s", m_SeqDB->GetLabel(m_IdA)); - else if (m_SA != 0) - Log(", A=%s", m_SA->Label); - if (m_SeqDB != 0 && m_IdB != UINT_MAX) - Log(", B=%s", m_SeqDB->GetLabel(m_IdB)); - else if (m_SB != 0) - Log(", B=%s", m_SB->Label); - Log("\n"); - if (!WithData || m_RowCount == 0 || m_ColCount == 0) - return; - - const char *z = GetAsStr(0, 0); - unsigned Width = strlen(z); - unsigned Mod = 1; - for (unsigned i = 0; i < Width; ++i) - Mod *= 10; - - if (m_Alpha[0] != 0) - { - Log("// Alphabet=%s\n", m_Alpha); - Log("// "); - unsigned n = strlen(m_Alpha); - for (unsigned j = 0; j < n; ++j) - Log(" %*c", Width, m_Alpha[j]); - Log("\n"); - for (unsigned i = 0; i < n; ++i) - { - Log("/* %c */ {", m_Alpha[i]); - unsigned ci = m_Alpha[i]; - for (unsigned j = 0; j < n; ++j) - { - unsigned cj = m_Alpha[j]; - Log("%s,", GetAsStr(ci, cj)); - } - Log("}, // %c\n", m_Alpha[i]); - } - return; - } - else if (m_Alpha2[0] != 0) - { - unsigned n = strlen(m_Alpha2); - Log("// Alphabet=%s\n", m_Alpha2); - Log("// "); - for (unsigned j = 0; j < n; ++j) - Log(" %*c", Width, m_Alpha2[j]); - Log("\n"); - for (unsigned i = 0; i < n; ++i) - { - Log("/* %c */ {", m_Alpha2[i]); - unsigned ci = m_Alpha2[i]; - for (unsigned j = 0; j < n; ++j) - Log("%s,", GetAsStr(i, j)); - Log("}, // %c\n", m_Alpha2[i]); - } - return; - } - - const byte *A = 0; - const byte *B = 0; - if (m_SeqDB != 0 && m_IdA != UINT_MAX) - A = m_SeqDB->GetSeq(m_IdA); - else if (m_SA != 0) - A = m_SA->Seq; - if (m_SeqDB != 0 && m_IdB != UINT_MAX) - B = m_SeqDB->GetSeq(m_IdB); - else if (m_SB != 0) - B = m_SB->Seq; - - if (B != 0) - { - if (A != 0) - Log(" "); - Log("%5.5s", ""); - if (ZeroBased) - for (unsigned j = 0; j < m_ColCount; ++j) - Log("%*c", Width, B[j]); - else - for (unsigned j = 0; j < m_ColCount; ++j) - Log("%*c", Width, j == 0 ? ' ' : B[j-1]); - Log("\n"); - } - - if (A != 0) - Log(" "); - Log("%5.5s", ""); - for (unsigned j = 0; j < m_ColCount; ++j) - Log("%*u", Width, j%Mod); - Log("\n"); - - for (unsigned i = 0; i < m_RowCount; ++i) - { - if (A != 0) - { - if (ZeroBased) - Log("%c ", A[i]); - else - Log("%c ", i == 0 ? ' ' : A[i-1]); - } - Log("%4u ", i); - - for (unsigned j = 0; j < m_ColCount; ++j) - { - const char *s = GetAsStr(i, j); - if (Opts & OPT_LOG) - s = LogizeStr(s); - else if (Opts & OPT_EXP) - s = ExpizeStr(s); - Log("%s", s); - } - Log("\n"); - } - } -static unsigned g_MatrixFileCount; - -void MxBase::LogCounts() - { - Log("\n"); - Log("MxBase::LogCounts()\n"); - Log(" What N\n"); - Log("---------- ----------\n"); - Log(" Allocs %10u\n", m_AllocCount); - Log("ZeroAllocs %10u\n", m_ZeroAllocCount); - Log(" Grows %10u\n", m_GrowAllocCount); - Log(" Bytes %10.10s\n", MemBytesToStr(m_TotalBytes)); - Log(" Max bytes %10.10s\n", MemBytesToStr(m_MaxBytes)); - } diff --git a/mx.h b/mx.h deleted file mode 100644 index 1438900..0000000 --- a/mx.h +++ /dev/null @@ -1,454 +0,0 @@ -#ifndef mx_h -#define mx_h - -#include -#include -#include -#include "timing.h" -#include "myutils.h" - -const int OPT_LOG = 0x01; -const int OPT_EXP = 0x02; -const int OPT_ZERO_BASED = 0x04; -const float MINUS_INFINITY = -9e9f; -const float UNINIT = -8e8f; - -struct SeqData; - -template const char *TypeToStr(T t) - { - Die("Unspecialised TypeToStr() called"); - ureturn(0); - } - -template<> inline const char *TypeToStr(unsigned short f) - { - static char s[16]; - - sprintf(s, "%12u", f); - return s; - } - -template<> inline const char *TypeToStr(short f) - { - static char s[16]; - - sprintf(s, "%12d", f); - return s; - } - -template<> inline const char *TypeToStr(int f) - { - static char s[16]; - - sprintf(s, "%5d", f); - return s; - } - -template<> inline const char *TypeToStr(float f) - { - static char s[16]; - - if (f == UNINIT) - sprintf(s, "%12.12s", "?"); - else if (f < MINUS_INFINITY/2) - sprintf(s, "%12.12s", "*"); - else if (f == 0.0f) - sprintf(s, "%12.12s", "."); - else if (f >= -1e5 && f <= 1e5) - sprintf(s, "%12.5f", f); - else - sprintf(s, "%12.4g", f); - return s; - } - -template<> inline const char *TypeToStr(double f) - { - static char s[16]; - - if (f < -1e9) - sprintf(s, "%12.12s", "*"); - else if (f == 0.0f) - sprintf(s, "%12.12s", "."); - else if (f >= -1e-5 && f <= 1e5) - sprintf(s, "%12.5f", f); - else - sprintf(s, "%12.4g", f); - return s; - } - -static inline const char *FloatToStr(float f, string &s) - { - s = TypeToStr(f); - return s.c_str(); - } - -template<> inline const char *TypeToStr(char c) - { - static char s[2]; - s[0] = c; - return s; - } - -template<> inline const char *TypeToStr(byte c) - { - static char s[2]; - s[0] = c; - return s; - } - -template<> inline const char *TypeToStr(bool tof) - { - static char s[2]; - s[0] = tof ? 'T' : 'F'; - return s; - } - -struct SeqDB; - -struct MxBase - { -private: - MxBase(const MxBase &rhs); - MxBase &operator=(const MxBase &rhs); - -public: - char m_Name[32]; - char m_Alpha[32]; - char m_Alpha2[32]; - unsigned m_RowCount; - unsigned m_ColCount; - unsigned m_AllocatedRowCount; - unsigned m_AllocatedColCount; - const SeqDB *m_SeqDB; - unsigned m_IdA; - unsigned m_IdB; - const SeqData *m_SA; - const SeqData *m_SB; - - static list *m_Matrices; - //static MxBase *Get(const string &Name); - //static float **Getf(const string &Name); - //static double **Getd(const string &Name); - //static char **Getc(const string &Name); - - static unsigned m_AllocCount; - static unsigned m_ZeroAllocCount; - static unsigned m_GrowAllocCount; - static double m_TotalBytes; - static double m_MaxBytes; - - static void OnCtor(MxBase *Mx); - static void OnDtor(MxBase *Mx); - - MxBase() - { - m_AllocatedRowCount = 0; - m_AllocatedColCount = 0; - m_RowCount = 0; - m_ColCount = 0; - m_IdA = UINT_MAX; - m_IdB = UINT_MAX; - m_SeqDB = 0; - OnCtor(this); - } - virtual ~MxBase() - { - OnDtor(this); - } - - virtual unsigned GetTypeSize() const = 0; - virtual unsigned GetBytes() const = 0; - - void Clear() - { - FreeData(); - m_AllocatedRowCount = 0; - m_AllocatedColCount = 0; - m_RowCount = 0; - m_ColCount = 0; - m_IdA = UINT_MAX; - m_IdB = UINT_MAX; - m_SA = 0; - m_SB = 0; - } - - bool Empty() const - { - return m_RowCount == 0; - } - - virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0; - virtual void FreeData() = 0; - virtual const char *GetAsStr(unsigned i, unsigned j) const = 0; - - void SetAlpha(const char *Alpha) - { - unsigned n = sizeof(m_Alpha); - strncpy(m_Alpha, Alpha, n); - m_Alpha[n] = 0; - } - - void Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqDB *DB, unsigned IdA, unsigned IdB, - const SeqData *SA, const SeqData *SB); - - void Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX); - - void Alloc(const char *Name, unsigned RowCount, unsigned ColCount, - const SeqData *SA, const SeqData *SB); - - static void LogAll() - { - Log("\n"); - if (m_Matrices == 0) - { - Log("MxBase::m_Matrices=0\n"); - return; - } - Log("\n"); - Log("AllRows AllCols Sz MB Name\n"); - Log("------- ------- ---- -------- ----\n"); - double TotalMB = 0; - for (list::const_iterator p = m_Matrices->begin(); - p != m_Matrices->end(); ++p) - { - const MxBase *Mx = *p; - if (Mx == 0) - continue; - //if (Mx->m_RowCount != 0 || ShowEmpty) - // Mx->LogMe(WithData); - unsigned ar = Mx->m_AllocatedRowCount; - if (ar == 0) - continue; - unsigned ac = Mx->m_AllocatedColCount; - unsigned sz = Mx->GetTypeSize(); - double MB = (double) ar*(double) ac*(double) sz/1e6; - TotalMB += MB; - Log("%7u %7u %4u %8.2f %s\n", ar, ac, sz, MB, Mx->m_Name); - } - Log(" --------\n"); - Log("%7.7s %7.7s %4.4s %8.2f\n", "", "", "", TotalMB); - } - - void LogMe(bool WithData = true, int Opts = 0) const; - static void LogCounts(); - }; - -template struct Mx : public MxBase - { -// Disable unimplemented stuff -private: - Mx(Mx &rhs); - Mx &operator=(Mx &rhs); - // const Mx &operator=(const Mx &rhs) const; - -public: - T **m_Data; - - Mx() - { - m_Data = 0; - } - - ~Mx() - { - FreeData(); - } - - virtual void AllocData(unsigned RowCount, unsigned ColCount) - { - if (opt_logmemgrows) - Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n", - RowCount, ColCount, IntToStr(GetBytes()), m_Name); - // m_Data = myalloc(RowCount); - m_Data = MYALLOC(T *, RowCount, Mx); - for (unsigned i = 0; i < RowCount; ++i) - // m_Data[i] = myalloc(ColCount); - m_Data[i] = MYALLOC(T, ColCount, Mx); - AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T)); - - m_AllocatedRowCount = RowCount; - m_AllocatedColCount = ColCount; - } - - virtual void FreeData() - { - for (unsigned i = 0; i < m_AllocatedRowCount; ++i) - MYFREE(m_Data[i], m_AllocatedColCount, Mx); - MYFREE(m_Data, m_AllocatedRowCount, Mx); - SubBytes("Mx_AllocData", - m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T)); - - m_Data = 0; - m_RowCount = 0; - m_ColCount = 0; - m_AllocatedRowCount = 0; - m_AllocatedColCount = 0; - } - - T **GetData() - { - return (T **) m_Data; - } - - T Get(unsigned i, unsigned j) const - { - assert(i < m_RowCount); - assert(j < m_ColCount); - return m_Data[i][j]; - } - - void Put(unsigned i, unsigned j, T x) const - { - assert(i < m_RowCount); - assert(j < m_ColCount); - m_Data[i][j] = x; - } - - T GetOffDiagAvgs(vector &Avgs) const - { - if (m_RowCount != m_ColCount) - Die("GetOffDiagAvgs, not symmetrical"); - Avgs.clear(); - T Total = T(0); - for (unsigned i = 0; i < m_RowCount; ++i) - { - T Sum = T(0); - for (unsigned j = 0; j < m_ColCount; ++j) - { - if (j == i) - continue; - Sum += m_Data[i][j]; - } - T Avg = Sum/(m_RowCount-1); - Total += Avg; - Avgs.push_back(Avg); - } - return m_RowCount == 0 ? T(0) : Total/m_RowCount; - } - - unsigned GetTypeSize() const - { - return sizeof(T); - } - - virtual unsigned GetBytes() const - { - return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() + - m_AllocatedRowCount*sizeof(T *); - } - - const char *GetAsStr(unsigned i, unsigned j) const - { - return TypeToStr(Get(i, j)); - } - - const T *const *const GetData() const - { - return (const T *const *) m_Data; - } - - void Copy(const Mx &rhs) - { - Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB); - const T * const *Data = rhs.GetData(); - for (unsigned i = 0; i < m_RowCount; ++i) - for (unsigned j = 0; j < m_ColCount; ++j) - m_Data[i][j] = Data[i][j]; - } - - void Assign(T v) - { - for (unsigned i = 0; i < m_RowCount; ++i) - for (unsigned j = 0; j < m_ColCount; ++j) - m_Data[i][j] = v; - } - - bool Eq(const Mx &rhs, bool Bwd = false) const - { - if (rhs.m_ColCount != m_ColCount) - return false; - if (rhs.m_RowCount != m_RowCount) - return false; - const T * const*d = rhs.GetData(); - int i1 = Bwd ? m_RowCount : 0; - int j1 = Bwd ? m_ColCount : 0; - int i2 = Bwd ? -1 : m_RowCount; - int j2 = Bwd ? -1 : m_ColCount; - for (int i = i1; i != i2; Bwd ? --i : ++i) - for (int j = j1; j != j2; Bwd ? --j : ++j) - { - float x = m_Data[i][j]; - float y = d[i][j]; - if (x < -1e10 && y < -1e10) - continue; - if (!feq(x, y)) - { - Warning("%s[%d][%d] = %g, %s = %g", - m_Name, i, j, x, rhs.m_Name, y); - return false; - } - } - return true; - } - - bool EqMask(const Mx &rhs, const Mx &Mask) const - { - if (rhs.m_ColCount != m_ColCount) - return false; - if (rhs.m_RowCount != m_RowCount) - return false; - - if (Mask.m_ColCount != m_ColCount) - return false; - if (Mask.m_RowCount != m_RowCount) - return false; - - const T * const*d = rhs.GetData(); - bool Bwd = false; - int i1 = Bwd ? m_RowCount : 0; - int j1 = Bwd ? m_ColCount : 0; - int i2 = Bwd ? -1 : m_RowCount; - int j2 = Bwd ? -1 : m_ColCount; - for (int i = i1; i != i2; Bwd ? --i : ++i) - for (int j = j1; j != j2; Bwd ? --j : ++j) - { - if (!Mask.m_Data[i][j]) - continue; - float x = m_Data[i][j]; - float y = d[i][j]; - if (x < -1e10 && y < -1e10) - continue; - if (!feq(x, y)) - { - Warning("%s[%d][%d] = %g, %s = %g", - m_Name, i, j, x, rhs.m_Name, y); - return false; - } - } - return true; - } - - void Init(T v) - { - for (unsigned i = 0; i < m_RowCount; ++i) - for (unsigned j = 0; j < m_ColCount; ++j) - m_Data[i][j] = v; - } - }; - -void WriteMx(const string &Name, Mx &Mxf); - -template void ReserveMx(Mx &Mxf, unsigned N = UINT_MAX) - { - if (Mxf.m_AllocatedRowCount > 0) - return; - extern unsigned g_MaxInputSeqLength; - if (N == UINT_MAX) - N = g_MaxInputSeqLength+1; - Mxf.Alloc("(Reserved)", N, N); - } - -#endif // mx_h diff --git a/myopts.h b/myopts.h deleted file mode 100644 index ba901ea..0000000 --- a/myopts.h +++ /dev/null @@ -1,190 +0,0 @@ -#ifndef MY_VERSION -#define MY_VERSION "4.2" -#endif - -STR_OPT( input, 0) -STR_OPT( query, 0) -STR_OPT( db, 0) -STR_OPT( sort, 0) -STR_OPT( output, 0) -STR_OPT( uc, 0) -STR_OPT( clstr2uc, 0) -STR_OPT( uc2clstr, 0) -STR_OPT( uc2fasta, 0) -STR_OPT( uc2fastax, 0) -STR_OPT( mergesort, 0) -STR_OPT( tmpdir, ".") -STR_OPT( staralign, 0) -STR_OPT( sortuc, 0) -STR_OPT( blastout, 0) -STR_OPT( blast6out, 0) -STR_OPT( fastapairs, 0) -STR_OPT( idchar, "|") -STR_OPT( diffchar, " ") -STR_OPT( uchime, 0) -STR_OPT( gapopen, 0) -STR_OPT( gapext, 0) -STR_OPT( uhire, 0) -STR_OPT( ids, "99,98,95,90,85,80,70,50,35") -STR_OPT( seeds, 0) -STR_OPT( clump, 0) -STR_OPT( clumpout, 0) -STR_OPT( clump2fasta, 0) -STR_OPT( clumpfasta, 0) -STR_OPT( hireout, 0) -STR_OPT( mergeclumps, 0) -STR_OPT( alpha, 0) -STR_OPT( hspalpha, 0) -STR_OPT( probmx, 0) -STR_OPT( matrix, 0) -STR_OPT( tracestate, 0) -STR_OPT( chainout, 0) -STR_OPT( cluster, 0) -STR_OPT( computekl, 0) -STR_OPT( userout, 0) -STR_OPT( userfields, 0) -STR_OPT( seedsout, 0) -STR_OPT( chainhits, 0) -STR_OPT( findorfs, 0) -STR_OPT( strand, 0) -STR_OPT( getseqs, 0) -STR_OPT( labels, 0) -STR_OPT( doug, 0) -STR_OPT( makeindex, 0) -STR_OPT( indexstats, 0) -STR_OPT( uchimeout, 0) -STR_OPT( uchimealns, 0) -STR_OPT( xframe, 0) -STR_OPT( mkctest, 0) -STR_OPT( allpairs, 0) -STR_OPT( fastq2fasta, 0) -STR_OPT( otusort, 0) -STR_OPT( sparsedist, 0) -STR_OPT( sparsedistparams, 0) -STR_OPT( mcc, 0) -STR_OPT( utax, 0) -STR_OPT( simcl, 0) -STR_OPT( absort, 0) -STR_OPT( cc, 0) -STR_OPT( uslink, 0) - -UNS_OPT( band, 16, 0, UINT_MAX) -UNS_OPT( minlen, 10, 1, UINT_MAX) -UNS_OPT( maxlen, 10000, 1, UINT_MAX) -UNS_OPT( w, 0, 1, UINT_MAX) -UNS_OPT( k, 0, 1, UINT_MAX) -UNS_OPT( stepwords, 8, 0, UINT_MAX) -UNS_OPT( maxaccepts, 1, 0, UINT_MAX) -UNS_OPT( maxrejects, 8, 0, UINT_MAX) -UNS_OPT( maxtargets, 0, 0, UINT_MAX) -UNS_OPT( minhsp, 32, 1, UINT_MAX) -UNS_OPT( bump, 50, 0, 100) -UNS_OPT( rowlen, 64, 8, UINT_MAX) -UNS_OPT( idprefix, 0, 0, UINT_MAX) -UNS_OPT( idsuffix, 0, 0, UINT_MAX) -UNS_OPT( chunks, 4, 2, UINT_MAX) -UNS_OPT( minchunk, 64, 2, UINT_MAX) -UNS_OPT( maxclump, 1000, 1, UINT_MAX) -UNS_OPT( iddef, 0, 0, UINT_MAX) -UNS_OPT( mincodons, 20, 1, UINT_MAX) -UNS_OPT( maxovd, 8, 0, UINT_MAX) -UNS_OPT( max2, 40, 0, UINT_MAX) -UNS_OPT( querylen, 500, 0, UINT_MAX) -UNS_OPT( targetlen, 500, 0, UINT_MAX) -UNS_OPT( orfstyle, (1+2+4), 0, UINT_MAX) -UNS_OPT( dbstep, 1, 1, UINT_MAX) -UNS_OPT( randseed, 1, 0, UINT_MAX) -UNS_OPT( maxp, 2, 2, UINT_MAX) -UNS_OPT( idsmoothwindow, 32, 1, UINT_MAX) -UNS_OPT( mindiffs, 3, 1, UINT_MAX) -UNS_OPT( maxspan1, 24, 1, UINT_MAX) -UNS_OPT( maxspan2, 24, 1, UINT_MAX) -UNS_OPT( minorfcov, 16, 1, UINT_MAX) -UNS_OPT( hashsize, 4195879, 1, UINT_MAX) -UNS_OPT( maxpoly, 0, 0, UINT_MAX) -UNS_OPT( droppct, 50, 0, 100) -UNS_OPT( secs, 10, 0, UINT_MAX) -UNS_OPT( maxqgap, 0, 0, UINT_MAX) -UNS_OPT( maxtgap, 0, 0, UINT_MAX) - -INT_OPT( frame, 0, -3, +3) - -TOG_OPT( trace, false) -TOG_OPT( logmemgrows, false) -TOG_OPT( trunclabels, false) -TOG_OPT( verbose, false) -TOG_OPT( wordcountreject, true) -TOG_OPT( rev, false) -TOG_OPT( output_rejects, false) -TOG_OPT( blast_termgaps, false) -TOG_OPT( fastalign, true) -TOG_OPT( flushuc, false) -TOG_OPT( stable_sort, false) -TOG_OPT( minus_frames, true) -TOG_OPT( usort, true) -TOG_OPT( nb, false) -TOG_OPT( twohit, true) -TOG_OPT( ssort, false) -TOG_OPT( log_query, false) -TOG_OPT( log_hothits, false) -TOG_OPT( logwordstats, false) -TOG_OPT( ucl, false) -TOG_OPT( skipgaps2, true) -TOG_OPT( skipgaps, true) -TOG_OPT( denovo, false) -TOG_OPT( cartoon_orfs, false) -TOG_OPT( label_ab, false) -TOG_OPT( wordweight, false) -TOG_OPT( isort, false) -TOG_OPT( selfid, false) -TOG_OPT( leftjust, false) -TOG_OPT( rightjust, false) - -FLT_OPT( id, 0.0, 0.0, 1.0) -FLT_OPT( weak_id, 0.0, 0.0, 1.0) -FLT_OPT( match, 1.0, 0.0, FLT_MAX) -FLT_OPT( mismatch, -2.0, 0.0, FLT_MAX) -FLT_OPT( split, 1000.0, 1.0, FLT_MAX) -FLT_OPT( evalue, 10.0, 0.0, FLT_MAX) -FLT_OPT( weak_evalue, 10.0, 0.0, FLT_MAX) -FLT_OPT( evalue_g, 10.0, 0.0, FLT_MAX) -FLT_OPT( chain_evalue, 10.0, 0.0, FLT_MAX) -FLT_OPT( xdrop_u, 16.0, 0.0, FLT_MAX) -FLT_OPT( xdrop_g, 32.0, 0.0, FLT_MAX) -FLT_OPT( xdrop_ug, 16.0, 0.0, FLT_MAX) -FLT_OPT( xdrop_nw, 16.0, 0.0, FLT_MAX) -FLT_OPT( ka_gapped_lambda, 0.0, 0.0, FLT_MAX) -FLT_OPT( ka_ungapped_lambda, 0.0, 0.0, FLT_MAX) -FLT_OPT( ka_gapped_k, 0.0, 0.0, FLT_MAX) -FLT_OPT( ka_ungapped_k, 0.0, 0.0, FLT_MAX) -FLT_OPT( ka_dbsize, 0.0, 0.0, FLT_MAX) -FLT_OPT( chain_targetfract, 0.0, 0.0, 1.0) -FLT_OPT( targetfract, 0.0, 0.0, 1.0) -FLT_OPT( queryfract, 0.0, 0.0, 1.0) -FLT_OPT( fspenalty, 16.0, 0.0, FLT_MAX) -FLT_OPT( sspenalty, 20.0, 0.0, FLT_MAX) -FLT_OPT( seedt1, 13.0, 0.0, FLT_MAX) -FLT_OPT( seedt2, 11.0, 0.0, FLT_MAX) -FLT_OPT( lopen, 11.0, 0.0, FLT_MAX) -FLT_OPT( lext, 1.0, 0.0, FLT_MAX) -FLT_OPT( minh, 0.3, 0.0, FLT_MAX) -FLT_OPT( xn, 8.0, 0.0, FLT_MAX) -FLT_OPT( dn, 1.4, 0.0, FLT_MAX) -FLT_OPT( xa, 1.0, 0.0, FLT_MAX) -FLT_OPT( mindiv, 0.5, 0.0, 100.0) -FLT_OPT( abskew, 2, 0.0, 100.0) -FLT_OPT( abx, 8.0, 0.0, 100.0) -FLT_OPT( minspanratio1, 0.7, 0.0, 1.0) -FLT_OPT( minspanratio2, 0.7, 0.0, 1.0) - -FLAG_OPT( usersort) -FLAG_OPT( exact) -FLAG_OPT( optimal) -FLAG_OPT( self) -FLAG_OPT( ungapped) -FLAG_OPT( global) -FLAG_OPT( local) -FLAG_OPT( xlat) -FLAG_OPT( realign) -FLAG_OPT( hash) -FLAG_OPT( derep) diff --git a/myutils.cpp b/myutils.cpp deleted file mode 100755 index b184649..0000000 --- a/myutils.cpp +++ /dev/null @@ -1,1852 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) -#include -#include -#include -#include -#include -#include -#else -//#include -#include -#include -#include -#include -#endif - -#include "myutils.h" - -const char *SVN_VERSION = -#include "svnversion.h" -; - -#define TEST_UTILS 0 - -using namespace std; - -const unsigned MY_IO_BUFSIZ = 32000; -const unsigned MAX_FORMATTED_STRING_LENGTH = 64000; - -static char *g_IOBuffers[256]; -static time_t g_StartTime = time(0); -static vector g_Argv; -static double g_PeakMemUseBytes; - -#if TEST_UTILS -void TestUtils() -{ - const int C = 100000000; - for (int i = 0; i < C; ++i) - ProgressStep(i, C, "something or other"); - - Progress("\n"); - Progress("Longer message\r"); - Sleep(1000); - Progress("Short\r"); - Sleep(1000); - Progress("And longer again\r"); - Sleep(1000); - Progress("Shrt\n"); - Sleep(1000); - const unsigned N = 10; - unsigned M = 10; - for (unsigned i = 0; i < N; ++i) - { - ProgressStep(i, N, "Allocating 1MB blocks"); - for (unsigned j = 0; j < M; ++j) - { - ProgressStep(j, M, "Inner loop"); - malloc(100000); - Sleep(500); - } - } -} -#endif // TEST_UTILS - -static void AllocBuffer(FILE *f) -{ - int fd = fileno(f); - if (fd < 0 || fd >= 256) - return; - if (g_IOBuffers[fd] == 0) - g_IOBuffers[fd] = myalloc(char, MY_IO_BUFSIZ); - setvbuf(f, g_IOBuffers[fd], _IOFBF, MY_IO_BUFSIZ); -} - -static void FreeBuffer(FILE *f) -{ - int fd = fileno(f); - if (fd < 0 || fd >= 256) - return; - if (g_IOBuffers[fd] == 0) - return; - myfree(g_IOBuffers[fd]); - g_IOBuffers[fd] = 0; -} - -unsigned GetElapsedSecs() -{ - return (unsigned) (time(0) - g_StartTime); -} - -static unsigned g_NewCalls; -static unsigned g_FreeCalls; -static double g_InitialMemUseBytes; -static double g_TotalAllocBytes; -static double g_TotalFreeBytes; -static double g_NetBytes; -static double g_MaxNetBytes; - -void LogAllocStats() -{ - Log("\n"); - Log(" Allocs %u\n", g_NewCalls); - Log(" Frees %u\n", g_FreeCalls); - Log("Initial alloc %s\n", MemBytesToStr(g_InitialMemUseBytes)); - Log(" Total alloc %s\n", MemBytesToStr(g_TotalAllocBytes)); - Log(" Total free %s\n", MemBytesToStr(g_TotalFreeBytes)); - Log(" Net bytes %s\n", MemBytesToStr(g_NetBytes)); - Log("Max net bytes %s\n", MemBytesToStr(g_MaxNetBytes)); - Log(" Peak total %s\n", MemBytesToStr(g_MaxNetBytes + g_InitialMemUseBytes)); -} - -bool StdioFileExists(const string &FileName) -{ - struct stat SD; - int i = stat(FileName.c_str(), &SD); - return i == 0; -} - -void myassertfail(const char *Exp, const char *File, unsigned Line) -{ - Die("%s(%u) assert failed: %s", File, Line, Exp); -} - -bool myisatty(int fd) -{ - return isatty(fd) != 0; -} - -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) -#else -//#ifdef BIT_VERSION -//#include -//int fseeko(FILE *stream, off_t offset, int whence) -//// { -// off_t FilePos = _fseeki64(stream, offset, whence); -// return (FilePos == -1L) ? -1 : 0; -// } -//#define ftello(fm) (off_t) _ftelli64(fm) -//#else -int fseeko(FILE *stream, off_t offset, int whence) -{ - off_t FilePos = fseek(stream, offset, whence); - return (FilePos == -1L) ? -1 : 0; -} -#define ftello(fm) (off_t) ftell(fm) -//#endif -#endif - -void LogStdioFileState(FILE *f) -{ - unsigned long tellpos = (unsigned long) ftello(f); - long fseek_pos = fseek(f, 0, SEEK_CUR); - int fd = fileno(f); - Log("FILE * %p\n", f); - Log("fileno %d\n", fd); - Log("feof %d\n", feof(f)); - Log("ferror %d\n", ferror(f)); - Log("ftell %ld\n", tellpos); - Log("fseek %ld\n", fseek_pos); -#if !defined(_GNU_SOURCE) && !defined(__APPLE_CC__) - fpos_t fpos; - int fgetpos_retval = fgetpos(f, &fpos); - Log("fpos %ld (retval %d)\n", (long) fpos, fgetpos_retval); - // Log("eof %d\n", _eof(fd)); -#endif -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) -#else -#ifdef BIT_VERSION - __int64 pos64 = _ftelli64(f); - Log("_ftelli64 %lld\n", pos64); -#else - __int32 pos32 = ftell(f); - Log("ftell %lld\n", pos32); - -#endif -#endif -} - -FILE *OpenStdioFile(const string &FileName) -{ - const char *Mode = "rb"; - FILE *f = fopen(FileName.c_str(), Mode); - if (f == 0) - { - if (errno == EFBIG) - { - if (sizeof(off_t) == 4) - Die("File too big, off_t is 32 bits, recompile needed"); - else - Die("Cannot open '%s', file too big (off_t=%u bits)", - FileName.c_str(), sizeof(off_t)*8); - } - Die("Cannot open %s, errno=%d %s", - FileName.c_str(), errno, strerror(errno)); - } - AllocBuffer(f); - return f; -} - -FILE *CreateStdioFile(const string &FileName) -{ - FILE *f = fopen(FileName.c_str(), "wb+"); - if (0 == f) - Die("Cannot create %s, errno=%d %s", - FileName.c_str(), errno, strerror(errno)); - AllocBuffer(f); - return f; -} - -void SetStdioFilePos(FILE *f, off_t Pos) -{ - if (0 == f) - Die("SetStdioFilePos failed, f=NULL"); - int Ok = fseeko(f, Pos, SEEK_SET); - off_t NewPos = ftello(f); - if (Ok != 0 || Pos != NewPos) - { - LogStdioFileState(f); - Die("SetStdioFilePos(%d) failed, Ok=%d NewPos=%d", - (int) Pos, Ok, (int) NewPos); - } -} - -void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes) -{ - if (0 == f) - Die("ReadStdioFile failed, f=NULL"); - SetStdioFilePos(f, Pos); - unsigned BytesRead = fread(Buffer, 1, Bytes, f); - if (BytesRead != Bytes) - { - LogStdioFileState(f); - Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d", - (int) Bytes, (int) BytesRead, errno); - } -} - -void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes) -{ - if (0 == f) - Die("ReadStdioFile failed, f=NULL"); - unsigned BytesRead = fread(Buffer, 1, Bytes, f); - if (BytesRead != Bytes) - { - LogStdioFileState(f); - Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d", - (int) Bytes, (int) BytesRead, errno); - } -} - -// Return values from functions like lseek, ftell, fgetpos are -// "undefined" for files that cannot seek. Attempt to detect -// whether a file can seek by checking for error returns. -bool CanSetStdioFilePos(FILE *f) -{ - // Common special cases - if (f == stdin || f == stdout || f == stderr) - return false; - - fpos_t CurrPos; - int ok1 = fgetpos(f, &CurrPos); - if (ok1 < 0) - return false; - int ok2 = fseek(f, 0, SEEK_END); - if (ok2 < 0) - return false; - fpos_t EndPos; - int ok3 = fgetpos(f, &EndPos); - int ok4 = fsetpos(f, &CurrPos); - if (!ok3 || !ok4) - return false; - return true; -} - -byte *ReadAllStdioFile(FILE *f, unsigned &FileSize) -{ - const unsigned BUFF_SIZE = 1024*1024; - - if (CanSetStdioFilePos(f)) - { - off_t Pos = GetStdioFilePos(f); - off_t FileSize = GetStdioFileSize(f); - if (FileSize > UINT_MAX) - Die("ReadAllStdioFile: file size > UINT_MAX"); - SetStdioFilePos(f, 0); - byte *Buffer = myalloc(byte, unsigned(FileSize)); - ReadStdioFile(f, Buffer, unsigned(FileSize)); - SetStdioFilePos(f, Pos); - FileSize = unsigned(FileSize); - return Buffer; - } - - // Can't seek, read one buffer at a time. - FileSize = 0; - - // Just to initialize so that first call to realloc works. - byte *Buffer = (byte *) malloc(4); - if (Buffer == 0) - Die("ReadAllStdioFile, out of memory"); - for (;;) - { - Buffer = (byte *) realloc(Buffer, FileSize + BUFF_SIZE); - unsigned BytesRead = fread(Buffer + FileSize, 1, BUFF_SIZE, f); - FileSize += BytesRead; - if (BytesRead < BUFF_SIZE) - { - Buffer = (byte *) realloc(Buffer, FileSize); - return Buffer; - } - } -} - -byte *ReadAllStdioFile(const std::string &FileName, off_t &FileSize) -{ -#if WIN32 - FILE *f = OpenStdioFile(FileName); - FileSize = GetStdioFileSize(f); - CloseStdioFile(f); - - HANDLE h = CreateFile(FileName.c_str(), GENERIC_READ, FILE_SHARE_READ, - NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (h == INVALID_HANDLE_VALUE) - Die("ReadAllStdioFile:Open(%s) failed", FileName.c_str()); - - unsigned uFileSize = (unsigned) FileSize; - if ((off_t) uFileSize != FileSize) - Die("File too big (%.1f Gb): %s", double(FileSize)/1e9, FileName.c_str()); - - byte *Buffer = myalloc(byte, uFileSize); - DWORD BytesRead; - ReadFile(h, Buffer, uFileSize, &BytesRead, NULL); - if (FileSize != BytesRead) - Die("ReadAllStdioFile:Error reading %s, attempted %u got %u", - FileName.c_str(), FileSize, (unsigned) BytesRead); - - CloseHandle(h); - return Buffer; -#else - int h = open(FileName.c_str(), O_RDONLY); - if (h < 0) - Die("ReadAllStdioFile:Cannot open %s", FileName.c_str()); - FileSize = lseek(h, 0, SEEK_END); - if (FileSize == (off_t) (-1)) - Die("ReadAllStdioFile:Error seeking %s", FileName.c_str()); - // byte *Buffer = myalloc(FileSize); - size_t stBytes = (size_t) FileSize; - if ((off_t) stBytes != FileSize) - Die("ReadAllStdioFile: off_t overflow"); - byte *Buffer = (byte *) malloc(stBytes); - if (Buffer == 0) - Die("ReadAllStdioFile: failed to allocate %s", MemBytesToStr(stBytes)); - lseek(h, 0, SEEK_SET); - size_t n = read(h, Buffer, stBytes); - if (n != FileSize) - Die("ReadAllStdioFile, Error reading %s, attempted %g got %g", - FileName.c_str(), (double) FileSize, (double) n); - close(h); - return Buffer; -#endif -} - -void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes) -{ - if (0 == f) - Die("WriteStdioFile failed, f=NULL"); - SetStdioFilePos(f, Pos); - unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f); - if (BytesWritten != Bytes) - { - LogStdioFileState(f); - Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d", - (int) Bytes, (int) BytesWritten, errno); - } -} - -void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes) -{ - if (0 == f) - Die("WriteStdioFile failed, f=NULL"); - unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f); - if (BytesWritten != Bytes) - { - LogStdioFileState(f); - Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d", - (int) Bytes, (int) BytesWritten, errno); - } -} - -// Return false on EOF, true if line successfully read. -bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes) -{ - if (feof(f)) - return false; - if ((int) Bytes < 0) - Die("ReadLineStdioFile: Bytes < 0"); - char *RetVal = fgets(Line, (int) Bytes, f); - if (NULL == RetVal) - { - if (feof(f)) - return false; - if (ferror(f)) - Die("ReadLineStdioFile: errno=%d", errno); - Die("ReadLineStdioFile: fgets=0, feof=0, ferror=0"); - } - - if (RetVal != Line) - Die("ReadLineStdioFile: fgets != Buffer"); - unsigned n = strlen(Line); - if (n < 1 || Line[n-1] != '\n') - Die("ReadLineStdioFile: line too long or missing end-of-line"); - if (n > 0 && (Line[n-1] == '\r' || Line[n-1] == '\n')) - Line[n-1] = 0; - if (n > 1 && (Line[n-2] == '\r' || Line[n-2] == '\n')) - Line[n-2] = 0; - return true; -} - -// Return false on EOF, true if line successfully read. -bool ReadLineStdioFile(FILE *f, string &Line) -{ - Line.clear(); - for (;;) - { - int c = fgetc(f); - if (c == -1) - { - if (feof(f)) - { - if (!Line.empty()) - return true; - return false; - } - Die("ReadLineStdioFile, errno=%d", errno); - } - if (c == '\r') - continue; - if (c == '\n') - return true; - Line.push_back((char) c); - } -} - -// Copies all of fFrom regardless of current -// file position, appends to fTo. -void AppendStdioFileToFile(FILE *fFrom, FILE *fTo) -{ - off_t SavedFromPos = GetStdioFilePos(fFrom); - off_t FileSize = GetStdioFileSize(fFrom); - const off_t BUFF_SIZE = 1024*1024; - char *Buffer = myalloc(char, BUFF_SIZE); - SetStdioFilePos(fFrom, 0); - off_t BytesRemaining = FileSize; - while (BytesRemaining > 0) - { - off_t BytesToRead = BytesRemaining; - if (BytesToRead > BUFF_SIZE) - BytesToRead = BUFF_SIZE; - ReadStdioFile(fFrom, Buffer, (unsigned) BytesToRead); - WriteStdioFile(fTo, Buffer, (unsigned) BytesToRead); - BytesRemaining -= BytesToRead; - } - SetStdioFilePos(fFrom, SavedFromPos); -} - -void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo) -{ - int Ok = rename(FileNameFrom.c_str(), FileNameTo.c_str()); - if (Ok != 0) - Die("RenameStdioFile(%s,%s) failed, errno=%d %s", - FileNameFrom.c_str(), FileNameTo.c_str(), errno, strerror(errno)); -} - -void FlushStdioFile(FILE *f) -{ - int Ok = fflush(f); - if (Ok != 0) - Die("fflush(%p)=%d,", f, Ok); -} - -void CloseStdioFile(FILE *f) -{ - if (f == 0) - return; - int Ok = fclose(f); - if (Ok != 0) - Die("fclose(%p)=%d", f, Ok); - FreeBuffer(f); -} - -off_t GetStdioFilePos(FILE *f) -{ - off_t FilePos = ftello(f); - if (FilePos < 0) - Die("ftello=%d", (int) FilePos); - return FilePos; -} - -off_t GetStdioFileSize(FILE *f) -{ - off_t CurrentPos = GetStdioFilePos(f); - off_t zeroPos = 0; - int Ok = fseeko(f, zeroPos, SEEK_END); - if (Ok < 0) - Die("fseek in GetFileSize"); - - off_t Length = ftello(f); - if (Length < 0) - Die("ftello in GetFileSize"); - SetStdioFilePos(f, CurrentPos); - return Length; -} - -void DeleteStdioFile(const string &FileName) -{ - int Ok = remove(FileName.c_str()); - if (Ok != 0) - Die("remove(%s) failed, errno=%d %s", FileName.c_str(), errno, strerror(errno)); -} - -void myvstrprintf(string &Str, const char *Format, va_list ArgList) -{ - static char szStr[MAX_FORMATTED_STRING_LENGTH]; - vsnprintf(szStr, MAX_FORMATTED_STRING_LENGTH-1, Format, ArgList); - szStr[MAX_FORMATTED_STRING_LENGTH - 1] = '\0'; - Str.assign(szStr); -} - -void myvstrprintf(string &Str, const char *Format, ...) -{ - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Str, Format, ArgList); - va_end(ArgList); -} - -FILE *g_fLog = 0; - -void SetLogFileName(const string &FileName) -{ - if (g_fLog != 0) - CloseStdioFile(g_fLog); - g_fLog = 0; - if (FileName.empty()) - return; - g_fLog = CreateStdioFile(FileName); -} - -void Log(const char *Format, ...) -{ - if (g_fLog == 0) - return; - - static bool InLog = false; - if (InLog) - return; - - InLog = true; - va_list ArgList; - va_start(ArgList, Format); - vfprintf(g_fLog, Format, ArgList); - va_end(ArgList); - fflush(g_fLog); - InLog = false; -} - -void Die(const char *Format, ...) -{ - static bool InDie = false; - if (InDie) - exit(1); - InDie = true; - string Msg; - - if (g_fLog != 0) - setbuf(g_fLog, 0); - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Msg, Format, ArgList); - va_end(ArgList); - - fprintf(stderr, "\n\n"); - Log("\n"); - time_t t = time(0); - Log("%s", asctime(localtime(&t))); - for (unsigned i = 0; i < g_Argv.size(); i++) - { - fprintf(stderr, (i == 0) ? "%s" : " %s", g_Argv[i].c_str()); - Log((i == 0) ? "%s" : " %s", g_Argv[i].c_str()); - } - fprintf(stderr, "\n"); - Log("\n"); - - time_t CurrentTime = time(0); - unsigned ElapsedSeconds = unsigned(CurrentTime - g_StartTime); - const char *sstr = SecsToStr(ElapsedSeconds); - Log("Elapsed time: %s\n", sstr); - - const char *szStr = Msg.c_str(); - fprintf(stderr, "\n---Fatal error---\n%s\n", szStr); - Log("\n---Fatal error---\n%s\n", szStr); - -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) -#else - //if (IsDebuggerPresent()) - // __debugbreak(); - //_CrtSetDbgFlag(0); -#endif - - exit(1); -} - -void Warning(const char *Format, ...) -{ - string Msg; - - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Msg, Format, ArgList); - va_end(ArgList); - - const char *szStr = Msg.c_str(); - - fprintf(stderr, "\nWARNING: %s\n", szStr); - if (g_fLog != stdout) - { - Log("\nWARNING: %s\n", szStr); - fflush(g_fLog); - } -} - -#if defined linux || __linux__ -double GetMemUseBytes() -{ - static char statm[64]; - static int PageSize = 1; - if (0 == statm[0]) - { - PageSize = sysconf(_SC_PAGESIZE); - pid_t pid = getpid(); - sprintf(statm, "/proc/%d/statm", (int) pid); - } - - int fd = open(statm, O_RDONLY); - if (-1 == fd) - return 1000000; - char Buffer[64]; - int n = read(fd, Buffer, sizeof(Buffer) - 1); - close(fd); - fd = -1; - - if (n <= 0) - return 1000000; - - Buffer[n] = 0; - double Pages = atof(Buffer); - - double Bytes = Pages*PageSize; - if (Bytes > g_PeakMemUseBytes) - g_PeakMemUseBytes = Bytes; - return Bytes; -} -#elif defined(__APPLE__) || (__MACH__) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DEFAULT_MEM_USE 100000000.0 - -double GetMemUseBytes() -{ - task_t mytask = mach_task_self(); - struct task_basic_info ti; - memset((void *) &ti, 0, sizeof(ti)); - mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; - kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count); - if (ok == KERN_INVALID_ARGUMENT) - return DEFAULT_MEM_USE; - - if (ok != KERN_SUCCESS) - return DEFAULT_MEM_USE; - - double Bytes = (double ) ti.resident_size; - if (Bytes > g_PeakMemUseBytes) - g_PeakMemUseBytes = Bytes; - return Bytes; -} -#else -double GetMemUseBytes() -{ - return 0; -} -#endif - -double GetPeakMemUseBytes() -{ - return g_PeakMemUseBytes; -} - -const char *SecsToHHMMSS(int Secs) -{ - int HH = Secs/3600; - int MM = (Secs - HH*3600)/60; - int SS = Secs%60; - static char Str[16]; - if (HH == 0) - sprintf(Str, "%02d:%02d", MM, SS); - else - sprintf(Str, "%02d:%02d:%02d", HH, MM, SS); - return Str; -} - -const char *SecsToStr(double Secs) -{ - if (Secs >= 10.0) - return SecsToHHMMSS((int) Secs); - - static char Str[16]; - if (Secs < 1e-6) - sprintf(Str, "%.2gs", Secs); - else if (Secs < 1e-3) - sprintf(Str, "%.2fms", Secs*1e3); - else - sprintf(Str, "%.3fs", Secs); - return Str; -} - -const char *MemBytesToStr(double Bytes) -{ - static char Str[32]; - - if (Bytes < 1e6) - sprintf(Str, "%.1fkb", Bytes/1e3); - else if (Bytes < 10e6) - sprintf(Str, "%.1fMb", Bytes/1e6); - else if (Bytes < 1e9) - sprintf(Str, "%.0fMb", Bytes/1e6); - else if (Bytes < 10e9) - sprintf(Str, "%.1fGb", Bytes/1e9); - else if (Bytes < 100e9) - sprintf(Str, "%.0fGb", Bytes/1e9); - else - sprintf(Str, "%.3gb", Bytes); - return Str; -} - -const char *IntToStr(unsigned i) -{ - static char Str[32]; - - double d = (double) i; - if (i < 10000) - sprintf(Str, "%u", i); - else if (i < 1e6) - sprintf(Str, "%.1fk", d/1e3); - else if (i < 10e6) - sprintf(Str, "%.1fM", d/1e6); - else if (i < 1e9) - sprintf(Str, "%.0fM", d/1e6); - else if (i < 10e9) - sprintf(Str, "%.1fG", d/1e9); - else if (i < 100e9) - sprintf(Str, "%.0fG", d/1e9); - else - sprintf(Str, "%.3g", d); - return Str; -} - -const char *FloatToStr(double d) -{ - static char Str[32]; - - double a = fabs(d); - if (a < 0.01) - sprintf(Str, "%.3g", a); - else if (a >= 0.01 && a < 1) - sprintf(Str, "%.3f", a); - else if (a <= 10 && a >= 1) - { - double intpart; - if (modf(a, &intpart) < 0.05) - sprintf(Str, "%.0f", d); - else - sprintf(Str, "%.1f", d); - } - else if (a > 10 && a < 10000) - sprintf(Str, "%.0f", d); - else if (a < 1e6) - sprintf(Str, "%.1fk", d/1e3); - else if (a < 10e6) - sprintf(Str, "%.1fM", d/1e6); - else if (a < 1e9) - sprintf(Str, "%.0fM", d/1e6); - else if (a < 10e9) - sprintf(Str, "%.1fG", d/1e9); - else if (a < 100e9) - sprintf(Str, "%.0fG", d/1e9); - else - sprintf(Str, "%.3g", d); - return Str; -} - -bool opt_quiet = false; -bool opt_version = false; -bool opt_logopts = false; -bool opt_compilerinfo = false; -bool opt_help = false; -string opt_log = ""; - -bool optset_quiet = false; -bool optset_version = false; -bool optset_logopts = false; -bool optset_compilerinfo = false; -bool optset_help = false; -bool optset_log = false; - -static string g_CurrentProgressLine; -static string g_ProgressDesc; -static unsigned g_ProgressIndex; -static unsigned g_ProgressCount; - -static unsigned g_CurrProgressLineLength; -static unsigned g_LastProgressLineLength; -static unsigned g_CountsInterval; -static unsigned g_StepCalls; -static time_t g_TimeLastOutputStep; - -static string &GetProgressPrefixStr(string &s) -{ - double Bytes = GetMemUseBytes(); - unsigned Secs = GetElapsedSecs(); - s = string(SecsToHHMMSS(Secs)); - if (Bytes > 0) - { - s.push_back(' '); - char Str[32]; - sprintf(Str, "%5.5s", MemBytesToStr(Bytes)); - s += string(Str); - } - s.push_back(' '); - return s; -} - -void ProgressLog(const char *Format, ...) -{ - string Str; - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Str, Format, ArgList); - va_end(ArgList); - - Log("%s", Str.c_str()); - Progress("%s", Str.c_str()); -} - -void Progress(const char *Format, ...) -{ - if (opt_quiet) - return; - - string Str; - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Str, Format, ArgList); - va_end(ArgList); - -#if 0 - Log("Progress("); - for (unsigned i = 0; i < Str.size(); ++i) - { - char c = Str[i]; - if (c == '\r') - Log("\\r"); - else if (c == '\n') - Log("\\n"); - else - Log("%c", c); - } - Log(")\n"); -#endif //0 - - for (unsigned i = 0; i < Str.size(); ++i) - { - if (g_CurrProgressLineLength == 0) - { - string s; - GetProgressPrefixStr(s); - for (unsigned j = 0; j < s.size(); ++j) - { - fputc(s[j], stderr); - ++g_CurrProgressLineLength; - } - } - - char c = Str[i]; - if (c == '\n' || c == '\r') - { - for (unsigned j = g_CurrProgressLineLength; j < g_LastProgressLineLength; ++j) - fputc(' ', stderr); - if (c == '\n') - g_LastProgressLineLength = 0; - else - g_LastProgressLineLength = g_CurrProgressLineLength; - g_CurrProgressLineLength = 0; - fputc(c, stderr); - } - else - { - fputc(c, stderr); - ++g_CurrProgressLineLength; - } - } -} - -void ProgressExit() -{ - time_t Now = time(0); - struct tm *t = localtime(&Now); - const char *s = asctime(t); - unsigned Secs = GetElapsedSecs(); - - Log("\n"); - Log("Finished %s", s); // there is a newline in s - Log("Elapsed time %s\n", SecsToHHMMSS((int) Secs)); - Log("Max memory %s\n", MemBytesToStr(g_PeakMemUseBytes)); -#if WIN32 && DEBUG - // Skip exit(), which can be very slow in DEBUG build - // VERY DANGEROUS practice, because it skips global destructors. - // But if you know the rules, you can break 'em, right? - //ExitProcess(0); -#endif -} - -const char *PctStr(double x, double y) -{ - if (y == 0) - { - if (x == 0) - return "100%"; - else - return "inf%"; - } - static char Str[16]; - double p = x*100.0/y; - sprintf(Str, "%5.1f%%", p); - return Str; -} - -string &GetProgressLevelStr(string &s) -{ - unsigned Index = g_ProgressIndex; - unsigned Count = g_ProgressCount; - if (Count == UINT_MAX) - { - if (Index == UINT_MAX) - s = "100%"; - else - { - char Tmp[16]; - sprintf(Tmp, "%u", Index); - s = Tmp; - } - } - else - s = string(PctStr(Index+1, Count)); - s += string(" ") + g_ProgressDesc; - return s; -} - -void ProgressStep(unsigned i, unsigned N, const char *Format, ...) -{ - if (opt_quiet) - return; - - if (i == 0) - { - string Str; - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(Str, Format, ArgList); - va_end(ArgList); - g_ProgressDesc = Str; - g_ProgressIndex = 0; - g_ProgressCount = N; - g_CountsInterval = 1; - g_StepCalls = 0; - g_TimeLastOutputStep = 0; - if (g_CurrProgressLineLength > 0) - Progress("\n"); - } - - if (i >= N && i != UINT_MAX) - Die("ProgressStep(%u,%u)", i, N); - bool IsLastStep = (i == UINT_MAX || i + 1 == N); - if (!IsLastStep) - { - ++g_StepCalls; - if (g_StepCalls%g_CountsInterval != 0) - return; - - time_t Now = time(0); - if (Now == g_TimeLastOutputStep) - { - if (g_CountsInterval < 128) - g_CountsInterval = (g_CountsInterval*3)/2; - else - g_CountsInterval += 64; - return; - } - else - { - time_t Secs = Now - g_TimeLastOutputStep; - if (Secs > 1) - g_CountsInterval = unsigned(g_CountsInterval/(Secs*8)); - } - - if (g_CountsInterval < 1) - g_CountsInterval = 1; - - g_TimeLastOutputStep = Now; - } - - g_ProgressIndex = i; - - if (i > 0) - { - va_list ArgList; - va_start(ArgList, Format); - myvstrprintf(g_ProgressDesc, Format, ArgList); - } - - string LevelStr; - GetProgressLevelStr(LevelStr); - Progress(" %s\r", LevelStr.c_str()); - - if (IsLastStep) - { - g_CountsInterval = 1; - fputc('\n', stderr); - } -} - -enum OptType -{ - OT_Flag, - OT_Tog, - OT_Int, - OT_Uns, - OT_Str, - OT_Float, - OT_Enum -}; - -struct OptInfo -{ - void *Value; - bool *OptSet; - string LongName; - OptType Type; - int iMin; - int iMax; - unsigned uMin; - unsigned uMax; - double dMin; - double dMax; - map EnumValues; - - bool bDefault; - int iDefault; - unsigned uDefault; - double dDefault; - string strDefault; - - string Help; - - bool operator<(const OptInfo &rhs) const - { - return LongName < rhs.LongName; - } -}; - -static set g_Opts; - -void Help() -{ - printf("\n"); - - void Usage(); - Usage(); - - for (set::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p) - { - const OptInfo &Opt = *p; - - printf("\n"); - string LongName = Opt.LongName.c_str(); - if (Opt.Type == OT_Tog) - LongName = string("[no]") + LongName; - printf(" --%s ", LongName.c_str()); - - switch (Opt.Type) - { - case OT_Flag: - break; - case OT_Tog: - break; - case OT_Int: - printf(""); - break; - case OT_Uns: - printf(""); - break; - case OT_Str: - printf(""); - break; - case OT_Float: - printf(""); - break; - case OT_Enum: - printf(""); - break; - default: - printf("??type"); - break; - } - - printf(" "); - const string &s = Opt.Help; - for (string::const_iterator q = s.begin(); q != s.end(); ++q) - { - char c = *q; - if (c == '\n') - printf("\n "); - else - printf("%c", c); - } - printf("\n"); - } - printf("\n"); - exit(0); -} - -void CmdLineErr(const char *Format, ...) -{ - va_list ArgList; - va_start(ArgList, Format); - string Str; - myvstrprintf(Str, Format, ArgList); - va_end(ArgList); - fprintf(stderr, "\n"); - fprintf(stderr, "Invalid command line\n"); - fprintf(stderr, "%s\n", Str.c_str()); - fprintf(stderr, "For list of command-line options use --help.\n"); - fprintf(stderr, "\n"); - exit(1); -} - -static set::iterator GetOptInfo(const string &LongName, - bool ErrIfNotFound) -{ - for (set::iterator p = g_Opts.begin(); - p != g_Opts.end(); ++p) - { - const OptInfo &Opt = *p; - if (Opt.LongName == LongName) - return p; - if (Opt.Type == OT_Tog && "no" + Opt.LongName == LongName) - return p; - } - if (ErrIfNotFound) - CmdLineErr("Option --%s is invalid", LongName.c_str()); - return g_Opts.end(); -} - -static void AddOpt(const OptInfo &Opt) -{ - if (GetOptInfo(Opt.LongName, false) != g_Opts.end()) - Die("Option --%s defined twice", Opt.LongName.c_str()); - g_Opts.insert(Opt); -} - -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) -#else -#pragma warning(disable: 4505) // unreferenced local function -#endif - -static void DefineFlagOpt(const string &LongName, const string &Help, - void *Value, bool *OptSet) -{ - *(bool *) Value = false; - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.bDefault = false; - Opt.Help = Help; - Opt.Type = OT_Flag; - AddOpt(Opt); -} - -static void DefineTogOpt(const string &LongName, bool Default, const string &Help, - void *Value, bool *OptSet) -{ - *(bool *) Value = Default; - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.bDefault = Default; - Opt.Help = Help; - Opt.Type = OT_Tog; - AddOpt(Opt); -} - -static void DefineIntOpt(const string &LongName, int Default, int Min, int Max, - const string &Help, void *Value, bool *OptSet) -{ - *(int *) Value = Default; - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.iDefault = Default; - Opt.iMin = Min; - Opt.iMax = Max; - Opt.Help = Help; - Opt.Type = OT_Int; - AddOpt(Opt); -} - -static void DefineUnsOpt(const string &LongName, unsigned Default, unsigned Min, - unsigned Max, const string &Help, void *Value, bool *OptSet) -{ - *(unsigned *) Value = Default; - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.uDefault = Default; - Opt.uMin = Min; - Opt.uMax = Max; - Opt.Help = Help; - Opt.Type = OT_Uns; - AddOpt(Opt); -} - -static void DefineFloatOpt(const string &LongName, double Default, double Min, - double Max, const string &Help, void *Value, bool *OptSet) -{ - *(double *) Value = Default; - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.dDefault = Default; - Opt.dMin = Min; - Opt.dMax = Max; - Opt.Help = Help; - Opt.Type = OT_Float; - AddOpt(Opt); -} - -static void DefineStrOpt(const string &LongName, const char *Default, - const string &Help, void *Value, bool *OptSet) -{ - *(string *) Value = (Default == 0 ? "" : string(Default)); - - OptInfo Opt; - Opt.Value = Value; - Opt.OptSet = OptSet; - Opt.LongName = LongName; - Opt.strDefault = (Default == 0 ? "" : string(Default)); - Opt.Help = Help; - Opt.Type = OT_Str; - AddOpt(Opt); -} - -static void ParseEnumValues(const string &Values, map &EnumValues) -{ - EnumValues.clear(); - - string Name; - string Value; - bool Eq = false; - for (string::const_iterator p = Values.begin(); ; ++p) - { - char c = (p == Values.end() ? '|' : *p); - if (isspace(c)) - ; - else if (c == '|') - { - if (EnumValues.find(Name) != EnumValues.end()) - Die("Invalid enum values, '%s' defined twice: '%s'", - Name.c_str(), Values.c_str()); - if (Name.empty() || Value.empty()) - Die("Invalid enum values, empty name or value: '%s'", - Values.c_str()); - - EnumValues[Name] = atoi(Value.c_str()); - Name.clear(); - Value.clear(); - Eq = false; - } - else if (c == '=') - Eq = true; - else if (Eq) - Value.push_back(c); - else - Name.push_back(c); - if (p == Values.end()) - return; - } -} - -static void DefineEnumOpt(const string &LongName, const string &ShortName, - int Default, const string &Values, const string &Help, void *Value) -{ - *(int *) Value = Default; - - OptInfo Opt; - Opt.Value = Value; - Opt.LongName = LongName; - Opt.iDefault = Default; - Opt.Help = Help; - Opt.Type = OT_Enum; - ParseEnumValues(Values, Opt.EnumValues); - AddOpt(Opt); -} -#undef FLAG_OPT -#undef TOG_OPT -#undef INT_OPT -#undef UNS_OPT -#undef FLT_OPT -#undef STR_OPT -#undef ENUM_OPT -#define FLAG_OPT(LongName) bool opt_##LongName; bool optset_##LongName; -#define TOG_OPT(LongName, Default) bool opt_##LongName; bool optset_##LongName; -#define INT_OPT(LongName, Default, Min, Max) int opt_##LongName; bool optset_##LongName; -#define UNS_OPT(LongName, Default, Min, Max) unsigned opt_##LongName; bool optset_##LongName; -#define FLT_OPT(LongName, Default, Min, Max) double opt_##LongName; bool optset_##LongName; -#define STR_OPT(LongName, Default) string opt_##LongName; bool optset_##LongName; -#define ENUM_OPT(LongName, Values, Default) int opt_##LongName; bool optset_##LongName; -#include "myopts.h" - -static int EnumStrToInt(const OptInfo &Opt, const string &Value) -{ - const map &e = Opt.EnumValues; - string s; - for (map::const_iterator p = e.begin(); p != e.end(); ++p) - { - if (Value == p->first) - return p->second; - s += " " + p->first; - } - CmdLineErr("--%s %s not recognized, valid are: %s", - Opt.LongName.c_str(), Value.c_str(), s.c_str()); - ureturn(-1); -} - -static void SetOpt(OptInfo &Opt, const string &Value) -{ - *Opt.OptSet = true; - switch (Opt.Type) - { - case OT_Int: - { - *(int *) Opt.Value = atoi(Value.c_str()); - break; - } - case OT_Uns: - { - unsigned uValue = 0; - int n = sscanf(Value.c_str(), "%u", &uValue); - if (n != 1) - CmdLineErr("Invalid value '%s' for --%s", - Value.c_str(), Opt.LongName.c_str()); - *(unsigned *) Opt.Value = uValue; - break; - } - case OT_Float: - { - *(double *) Opt.Value = atof(Value.c_str()); - break; - } - case OT_Str: - { - *(string *) Opt.Value = Value; - break; - } - case OT_Enum: - { - *(int *) Opt.Value = EnumStrToInt(Opt, Value); - break; - } - default: - asserta(false); - } -} - -void LogOpts() -{ - for (set::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p) - { - const OptInfo &Opt = *p; - Log("%s = ", Opt.LongName.c_str()); - switch (Opt.Type) - { - case OT_Flag: - Log("%s", (*(bool *) Opt.Value) ? "yes" : "no"); - break; - case OT_Tog: - Log("%s", (*(bool *) Opt.Value) ? "on" : "off"); - break; - case OT_Int: - Log("%d", *(int *) Opt.Value); - break; - case OT_Uns: - Log("%u", *(unsigned *) Opt.Value); - break; - case OT_Float: - { - double Value = *(double *) Opt.Value; - if (Value == FLT_MAX) - Log("*"); - else - Log("%g", Value); - break; - } - case OT_Str: - Log("%s", (*(string *) Opt.Value).c_str()); - break; - case OT_Enum: - Log("%d", *(int *) Opt.Value); - break; - default: - asserta(false); - } - Log("\n"); - } -} - -static void CompilerInfo() -{ -#ifdef _FILE_OFFSET_BITS - printf("_FILE_OFFSET_BITS=%d\n", _FILE_OFFSET_BITS); -#else - printf("_FILE_OFFSET_BITS not defined\n"); -#endif - -#define x(t) printf("sizeof(" #t ") = %d\n", (int) sizeof(t)); - x(int) - x(long) - x(float) - x(double) - x(void *) - x(off_t) -#undef x - exit(0); -} - -void Split(const string &Str, vector &Fields, char Sep) -{ - Fields.clear(); - const unsigned Length = (unsigned) Str.size(); - string s; - for (unsigned i = 0; i < Length; ++i) - { - char c = Str[i]; - if ((Sep == 0 && isspace(c)) || c == Sep) - { - if (!s.empty() || Sep != 0) - Fields.push_back(s); - s.clear(); - } - else - s.push_back(c); - } - if (!s.empty()) - Fields.push_back(s); -} - -static void GetArgsFromFile(const string &FileName, vector &Args) -{ - Args.clear(); - - FILE *f = OpenStdioFile(FileName); - string Line; - while (ReadLineStdioFile(f, Line)) - { - size_t n = Line.find('#'); - if (n != string::npos) - Line = Line.substr(0, n); - vector Fields; - Split(Line, Fields); - Args.insert(Args.end(), Fields.begin(), Fields.end()); - } - CloseStdioFile(f); -} - -void MyCmdLine(int argc, char **argv) -{ - g_Opts.clear(); g_Argv.clear(); - static unsigned RecurseDepth = 0; - ++RecurseDepth; - - DefineFlagOpt("compilerinfo", "Write info about compiler types and #defines to stdout.", - (void *) &opt_compilerinfo, &optset_compilerinfo); - DefineFlagOpt("quiet", "Turn off progress messages.", (void *) &opt_quiet, &optset_quiet); - DefineFlagOpt("version", "Show version and exit.", (void *) &opt_version, &optset_version); - DefineFlagOpt("logopts", "Log options.", (void *) &opt_logopts, &optset_logopts); - DefineFlagOpt("help", "Display command-line options.", (void *) &opt_help, &optset_help); - DefineStrOpt("log", "", "Log file name.", (void *) &opt_log, &optset_log); - -#undef FLAG_OPT -#undef TOG_OPT -#undef INT_OPT -#undef UNS_OPT -#undef FLT_OPT -#undef STR_OPT -#undef ENUM_OPT -#define FLAG_OPT(LongName) DefineFlagOpt(#LongName, "help", (void *) &opt_##LongName, &optset_##LongName); -#define TOG_OPT(LongName, Default) DefineTogOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName); -#define INT_OPT(LongName, Default, Min, Max) DefineIntOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName); -#define UNS_OPT(LongName, Default, Min, Max) DefineUnsOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName); -#define FLT_OPT(LongName, Default, Min, Max) DefineFloatOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName); -#define STR_OPT(LongName, Default) DefineStrOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName); -#define ENUM_OPT(LongName, Values, Default) DefineEnumOpt(#LongName, Values, Default, "help", (void *) &opt_##LongName, &optset_##LongName); -#include "myopts.h" - - if (RecurseDepth == 0) - g_Argv.clear(); - - for (int i = 0; i < argc; ++i) - g_Argv.push_back(string(argv[i])); - - - int i = 1; - for (;;) - { - if (i >= argc) - break; - const string &Arg = g_Argv[i]; - - if (Arg.empty()) - continue; - else if (Arg == "file:" && i + 1 < argc) - { - const string &FileName = g_Argv[i+1]; - vector Args; - GetArgsFromFile(FileName, Args); - for (vector::const_iterator p = Args.begin(); - p != Args.end(); ++p) - { - g_Argv.push_back(*p); - ++argc; - } - i += 2; - continue; - } - else if (Arg.size() > 1 && Arg[0] == '-') - { - string LongName = (Arg.size() > 2 && Arg[1] == '-' ? Arg.substr(2) : Arg.substr(1)); - OptInfo Opt = *GetOptInfo(LongName, true); - *Opt.OptSet = true; - if (Opt.Type == OT_Flag) - { - g_Opts.erase(Opt); - *(bool *) Opt.Value = true; - g_Opts.insert(Opt); - ++i; - continue; - } - else if (Opt.Type == OT_Tog) - { - g_Opts.erase(Opt); - if (string("no") + Opt.LongName == LongName) - *(bool *) Opt.Value = false; - else - { - asserta(Opt.LongName == LongName); - *(bool *) Opt.Value = true; - } - g_Opts.insert(Opt); - ++i; - continue; - } - - ++i; - if (i >= argc) - CmdLineErr("Missing value for option --%s", LongName.c_str()); - - string Value = g_Argv[i]; - SetOpt(Opt, Value); - - ++i; - continue; - } - else - CmdLineErr("Expected -option_name or --option_name, got '%s'", Arg.c_str()); - } - - --RecurseDepth; - if (RecurseDepth > 0) - return; - - if (opt_help) - Help(); - - if (opt_compilerinfo) - CompilerInfo(); - - SetLogFileName(opt_log); - - if (opt_log != "") - { - for (int i = 0; i < argc; ++i) - Log("%s%s", i == 0 ? "" : " ", g_Argv[i].c_str()); - Log("\n"); - time_t Now = time(0); - struct tm *t = localtime(&Now); - const char *s = asctime(t); - Log("Started %s", s); // there is a newline in s - Log("Version " MY_VERSION ".%s\n", SVN_VERSION); - Log("\n"); - } - - if (opt_logopts) - LogOpts(); -} - -double Pct(double x, double y) -{ - if (y == 0.0f) - return 0.0f; - return (x*100.0f)/y; -} - -void GetCmdLine(string &s) -{ - s.clear(); - for (unsigned i = 0; i < SIZE(g_Argv); ++i) - { - if (i > 0) - s += " "; - s += g_Argv[i]; - } -} - -char *mystrsave(const char *s) -{ - unsigned n = unsigned(strlen(s)); - char *t = myalloc(char, n+1); - memcpy(t, s, n+1); - return t; -} - -void Logu(unsigned u, unsigned w, unsigned prefixspaces) -{ - for (unsigned i = 0; i < prefixspaces; ++i) - Log(" "); - if (u == UINT_MAX) - Log("%*.*s", w, w, "*"); - else - Log("%*u", w, u); -} - -void Logf(float x, unsigned w, unsigned prefixspaces) -{ - for (unsigned i = 0; i < prefixspaces; ++i) - Log(" "); - if (x == FLT_MAX) - Log("%*.*s", w, w, "*"); - else - Log("%*.2f", w, x); -} - -static uint32 g_SLCG_state = 1; - -// Numerical values used by Microsoft C, according to wikipedia: -// http://en.wikipedia.org/wiki/Linear_congruential_generator -static uint32 g_SLCG_a = 214013; -static uint32 g_SLCG_c = 2531011; - -// Simple Linear Congruential Generator -// Bad properties; used just to initialize the better generator. -static uint32 SLCG_rand() -{ - g_SLCG_state = g_SLCG_state*g_SLCG_a + g_SLCG_c; - return g_SLCG_state; -} - -static void SLCG_srand(uint32 Seed) -{ - g_SLCG_state = Seed; - for (int i = 0; i < 10; ++i) - SLCG_rand(); -} - -/*** - A multiply-with-carry random number generator, see: - http://en.wikipedia.org/wiki/Multiply-with-carry - - The particular multipliers used here were found on - the web where they are attributed to George Marsaglia. - ***/ - -static bool g_InitRandDone = false; -static uint32 g_X[5]; - -uint32 RandInt32() -{ - InitRand(); - - uint64 Sum = 2111111111*(uint64) g_X[3] + 1492*(uint64) g_X[2] + - 1776*(uint64) g_X[1] + 5115*(uint64) g_X[0] + g_X[4]; - g_X[3] = g_X[2]; - g_X[2] = g_X[1]; - g_X[1] = g_X[0]; - g_X[4] = (uint32) (Sum >> 32); - g_X[0] = (uint32) Sum; - return g_X[0]; -} - -unsigned randu32() -{ - return (unsigned) RandInt32(); -} - -void InitRand() -{ - if (g_InitRandDone) - return; - // Do this first to avoid recursion - g_InitRandDone = true; - - unsigned Seed = (optset_randseed ? opt_randseed : (unsigned) (time(0)*getpid())); - Log("RandSeed=%u\n", Seed); - SLCG_srand(Seed); - - for (unsigned i = 0; i < 5; i++) - g_X[i] = SLCG_rand(); - - for (unsigned i = 0; i < 100; i++) - RandInt32(); -} - -// MUST COME AT END BECAUSE OF #undef -#if RCE_MALLOC -#undef mymalloc -#undef myfree -#undef myfree2 -void *mymalloc(unsigned bytes, const char *FileName, int Line) -{ - void *rce_malloc(unsigned bytes, const char *FileName, int Line); - return rce_malloc(bytes, FileName, Line); -} - -void myfree(void *p, const char *FileName, int Line) -{ - void rce_free(void *p, const char *FileName, int Line); - rce_free(p, FileName, Line); -} - -void myfree2(void *p, unsigned bytes, const char *FileName, int Line) -{ - void rce_free(void *p, const char *FileName, int Line); - rce_free(p, FileName, Line); -} - -#else // RCE_MALLOC -void *mymalloc(unsigned bytes) -{ - ++g_NewCalls; - if (g_InitialMemUseBytes == 0) - g_InitialMemUseBytes = GetMemUseBytes(); - - g_TotalAllocBytes += bytes; - g_NetBytes += bytes; - if (g_NetBytes > g_MaxNetBytes) - { - if (g_NetBytes > g_MaxNetBytes + 10000000) - GetMemUseBytes();//to force update of peak - g_MaxNetBytes = g_NetBytes; - } - void *p = malloc(bytes); - //void *p = _malloc_dbg(bytes, _NORMAL_BLOCK, __FILE__, __LINE__); - if (0 == p) - { - double b = GetMemUseBytes(); - fprintf(stderr, "\nOut of memory mymalloc(%u), curr %.3g bytes", - (unsigned) bytes, b); - void LogAllocs(); - LogAllocs(); -#if DEBUG && defined(_MSC_VER) - asserta(_CrtCheckMemory()); -#endif - Die("Out of memory, mymalloc(%u), curr %.3g bytes\n", - (unsigned) bytes, b); - } - return p; -} - -void myfree(void *p) -{ - if (p == 0) - return; - free(p); - //_free_dbg(p, _NORMAL_BLOCK); -} - -void myfree2(void *p, unsigned bytes) -{ - ++g_FreeCalls; - g_TotalFreeBytes += bytes; - g_NetBytes -= bytes; - - if (p == 0) - return; - free(p); -} -#endif diff --git a/myutils.h b/myutils.h deleted file mode 100644 index b63ad3c..0000000 --- a/myutils.h +++ /dev/null @@ -1,274 +0,0 @@ -#ifndef myutils_h -#define myutils_h - -#define RCE_MALLOC 0 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _MSC_VER -#include -#endif - -using namespace std; - -#ifdef _MSC_VER -#include -#pragma warning(disable: 4996) // deprecated functions -#define _CRT_SECURE_NO_DEPRECATE 1 -#endif - -#if defined(_DEBUG) && !defined(DEBUG) -#define DEBUG 1 -#endif - -#if defined(DEBUG) && !defined(_DEBUG) -#define _DEBUG 1 -#endif - -#ifndef NDEBUG -#define DEBUG 1 -#define _DEBUG 1 -#endif - -typedef unsigned char byte; -typedef unsigned short uint16; -typedef unsigned uint32; -typedef int int32; -typedef double float32; -typedef signed char int8; -typedef unsigned char uint8; - -#ifdef _MSC_VER - -typedef __int64 int64; -typedef unsigned __int64 uint64; - -#define INT64_PRINTF "lld" -#define UINT64_PRINTF "llu" - -#define SIZE_T_PRINTF "u" -#define OFF64_T_PRINTF "lld" - -#define INT64_PRINTFX "llx" -#define UINT64_PRINTFX "llx" - -#define SIZE_T_PRINTFX "x" -#define OFF64_T_PRINTFX "llx" - -#elif defined(__x86_64__) - -typedef long int64; -typedef unsigned long uint64; - -#define INT64_PRINTF "ld" -#define UINT64_PRINTF "lu" - -#define SIZE_T_PRINTF "lu" -#define OFF64_T_PRINTF "ld" - -#define INT64_PRINTFX "lx" -#define UINT64_PRINTFX "lx" - -#define SIZE_T_PRINTFX "lx" -#define OFF64_T_PRINTFX "lx" - -#else - -typedef long long int64; -typedef unsigned long long uint64; - -#define INT64_PRINTF "lld" -#define UINT64_PRINTF "llu" - -#define SIZE_T_PRINTF "u" -#define OFF64_T_PRINTF "lld" - -#define INT64_PRINTFX "llx" -#define UINT64_PRINTFX "llx" - -#define SIZE_T_PRINTFX "x" -#define OFF64_T_PRINTFX "llx" -#endif - -#define d64 INT64_PRINTF -#define u64 UINT64_PRINTF -#define x64 UINT64_PRINTFX - -// const uint64 UINT64_MAX = (~((uint64) 0)); - -void myassertfail(const char *Exp, const char *File, unsigned Line); -#undef assert -#ifdef NDEBUG -#define assert(exp) ((void)0) -#define myassert(exp) ((void)0) -#else -#define assert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) ) -#define myassert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) ) -#endif -#define asserta(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) ) - -#define ureturn(x) return (x) - -#define NotUsed(v) ((void *) &v) - -// pom=plus or minus, tof=true or false -static inline char pom(bool Plus) { return Plus ? '+' : '-'; } -static inline char tof(bool x) { return x ? 'T' : 'F'; } -static inline char yon(bool x) { return x ? 'Y' : 'N'; } -unsigned GetElapsedSecs(); - -#if RCE_MALLOC - -void *rce_malloc(unsigned bytes, const char *FileName, int Line); -void rce_free(void *p, const char *FileName, int LineNr); -void rce_chkmem(); - -void rce_dumpmem_(const char *FileName, int LineNr); -#define rce_dumpmem() rce_dumpmem_(__FILE__, __LINE__) - -void rce_assertvalidptr_(void *p, const char *FileName, int LineNr); -#define rce_assertvalidptr(p) rce_assertvalidptr_(p, __FILE__, __LINE__) - -void rce_dumpptr_(void *p, const char *FileName, int LineNr); -#define rce_dumpptr(p) rce_dumpptr_(p, __FILE__, __LINE__) - -#define mymalloc(n) rce_malloc((n), __FILE__, __LINE__) -#define myfree(p) rce_free(p, __FILE__, __LINE__) -#define myfree2(p,n) rce_free(p, __FILE__, __LINE__) -#define myalloc(t, n) (t *) rce_malloc((n)*sizeof(t), __FILE__, __LINE__) - -#else // RCE_MALLOC -void *mymalloc(unsigned bytes); -void myfree2(void *p, unsigned Bytes); -void myfree(void *p); -#define rce_chkmem() /* empty */ -#define myalloc(t, n) (t *) mymalloc((n)*sizeof(t)) -#endif // RCE_MALLOC - -#define SIZE(c) unsigned((c).size()) - -bool myisatty(int fd); - -#ifdef _MSC_VER -#define off_t __int64 -#endif - -FILE *OpenStdioFile(const string &FileName); -FILE *CreateStdioFile(const string &FileName); -bool CanSetStdioFilePos(FILE *f); -void CloseStdioFile(FILE *f); -void SetStdioFilePos(FILE *f, off_t Pos); -void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes); -void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes); -void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes); -void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes); -bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes); -bool ReadLineStdioFile(FILE *f, string &Line); -byte *ReadAllStdioFile(FILE *f, off_t &FileSize); -byte *ReadAllStdioFile(const string &FileName, off_t &FileSize); -void AppendStdioFileToFile(FILE *fFrom, FILE *fTo); -void FlushStdioFile(FILE *f); -bool StdioFileExists(const string &FileName); -off_t GetStdioFilePos(FILE *f); -off_t GetStdioFileSize(FILE *f); -void LogStdioFileState(FILE *f); -void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo); -void DeleteStdioFile(const string &FileName); - -void myvstrprintf(string &Str, const char *szFormat, va_list ArgList); -void myvstrprintf(string &Str, const char *szFormat, ...); - -void SetLogFileName(const string &FileName); -void Log(const char *szFormat, ...); - -void Die(const char *szFormat, ...); -void Warning(const char *szFormat, ...); - -void ProgressStep(unsigned i, unsigned N, const char *Format, ...); -void Progress(const char *szFormat, ...); -void Progress(const string &Str); -void ProgressLog(const char *szFormat, ...); -void ProgressExit(); - -char *mystrsave(const char *s); - -double GetPeakMemUseBytes(); - -// Are two floats equal to within epsilon? -const double epsilon = 0.01; -inline bool feq(double x, double y, double epsilon) - { - if (fabs(x) > 10000) - epsilon = fabs(x)/10000; - if (fabs(x - y) > epsilon) - return false; - return true; - } - -inline bool feq(double x, double y) - { - if (x < -1e6 && y < -1e6) - return true; - double e = epsilon; - if (fabs(x) > 10000) - e = fabs(x)/10000; - if (fabs(x - y) > e) - return false; - return true; - } - -#define asserteq(x, y) assert(feq(x, y)) -#define assertaeq(x, y) asserta(feq(x, y)) - -#define zero(a, n) memset(a, 0, n*sizeof(a[0])) - -void InitRand(); -unsigned randu32(); -void Split(const string &Str, vector &Fields, char Sep = 0); -double Pct(double x, double y); -double GetMemUseBytes(); -const char *MemBytesToStr(double Bytes); -const char *IntToStr(unsigned i); -const char *FloatToStr(double d); -const char *SecsToStr(double Secs); -void Logu(unsigned u, unsigned w, unsigned prefixspaces = 2); -void Logf(float x, unsigned w, unsigned prefixspaces = 2); -const char *SecsToHHMMSS(int Secs); - -void MyCmdLine(int argc, char **argv); -void CmdLineErr(const char *Format, ...); -void Help(); -void GetCmdLine(string &s); - -#define FLAG_OPT(LongName) extern bool opt_##LongName; extern bool optset_##LongName; -#define TOG_OPT(LongName, Default) extern bool opt_##LongName; extern bool optset_##LongName; -#define INT_OPT(LongName, Default, Min, Max) extern int opt_##LongName; extern bool optset_##LongName; -#define UNS_OPT(LongName, Default, Min, Max) extern unsigned opt_##LongName; extern bool optset_##LongName; -#define FLT_OPT(LongName, Default, Min, Max) extern double opt_##LongName; extern bool optset_##LongName; -#define STR_OPT(LongName, Default) extern string opt_##LongName; extern bool optset_##LongName; -#define ENUM_OPT(LongName, Default, Values) extern int opt_##LongName; extern bool optset_##LongName; -#include "myopts.h" -#undef FLAG_OPT -#undef TOG_OPT -#undef INT_OPT -#undef UNS_OPT -#undef FLT_OPT -#undef STR_OPT -#undef ENUM_OPT - -extern const char *SVN_VERSION; -extern const char *SVN_MODS; -extern bool opt_quiet; -extern bool opt_version; -extern FILE *g_fLog; - -#endif // myutils_h diff --git a/orf.h b/orf.h deleted file mode 100644 index 90b29d1..0000000 --- a/orf.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef orf_h -#define orf_h - -#include "alpha.h" - -struct ORFData - { - const byte *NucSeq; - const byte *AminoSeq; - int Frame; - unsigned NucL; - unsigned AminoL; - unsigned NucLo; - unsigned NucHi; - ORFData *Next; - - unsigned GetNucPosFirstBase() const; - unsigned GetAAPos(unsigned NucPos) const; - unsigned GetCodex(unsigned NucPos) const; - unsigned GetNucLo(unsigned AALo, unsigned AAHi) const; - unsigned GetNucHi(unsigned AALo, unsigned AAHi) const; - unsigned GetAALo(unsigned NucLo, unsigned NucHi) const; - unsigned GetAAHi(unsigned NucLo, unsigned NucHi) const; - unsigned GetNucPosFirstBaseInCodon(unsigned AAPos) const; - unsigned GetNucPosLastBaseInCodon(unsigned AAPos) const; - unsigned RoundToCodonLo(unsigned NucPos) const; - unsigned RoundToCodonHi(unsigned NucPos) const; - void LogMe() const; - void LogMe2() const; - }; - -const byte ORFEND = '.'; - -void GetORFs(const byte *NucSeq, unsigned NucL, vector &ORFs, - unsigned ORFStyle, int FindFrame, int Sign); - -#endif // orf_h diff --git a/out.h b/out.h deleted file mode 100644 index 4ca50c7..0000000 --- a/out.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef out_h -#define out_h - -#include "seq.h" -#include "hsp.h" -#include "orf.h" -#include "path.h" -#include - -struct AlnData - { -/*** -SA.Seq and SB.Seq align. -Reverse strand stuff for nucleotides is handled like this: - SA.RevComp must be false. - If SB.RevComp is true, then SA.Seq is r.c.'d relative to the sequence in - the input file (query or db). If so, coordinates in HSP refer to SB.Seq - so are also r.c.'d relative to the original sequence. -***/ - SeqData SA; - SeqData SB; - HSPData HSP; - const char *Path; - char IdDesc[256]; - - float FractId; - float RawScore; - float BitScore; - float Evalue; - - void LogMe() const - { - Log("AD: "); - HSP.LogMe(); - Log(" %s,%s\n", SA.Label, SB.Label); - } - }; - -bool OnDerepHit(const SeqData &SA, const SeqData &SB); - -bool OnLocalUngappedHit(const SeqData &SA, const SeqData &SB, - const HSPData &HSP, float &Evalue, float &FractId); - -bool OnLocalGappedHit(const SeqData &SA, const SeqData &SB, - const HSPData &HSP, const PathData &PD, float &Evalue, float &FractId); - -bool OnGlobalHit(const SeqData &SA, const SeqData &SB, const PathData &PD, - float &FractId); - -void OnReject(const SeqData &SA, const SeqData &SB, double FractId, - const char *Path); - -void OnNotMatched(const char *Label, unsigned L); -void OnNewCluster(unsigned ClusterIndex, const char *Label, unsigned L); -void OnNewLibCluster(unsigned ClusterIndex, const char *Label, unsigned L); -void OnLibCluster(unsigned ClusterIndex, unsigned Size, double AvgId, - const char *Label); -void OnNewCluster(unsigned ClusterIndex, unsigned Size, double AvgId, - const char *Label); -void OnChainCov(const SeqData &NucleoSD, const SeqData &TargetSD, - float Score, float ChainCov); - -void SetUserFieldIndexes(const string &s); - -void BlastOut(FILE *f, const AlnData &AD); -void Blast6Out(FILE *f, const AlnData &AD); -void FastaPairOut(FILE *f, const AlnData &AD); -void UserOut(FILE *f, const AlnData &AD); - -void BlastOutORF(FILE *f, const AlnData &AD); - -void OpenOutputFiles(); -void CloseOutputFiles(); -void SetLibSeedCount(unsigned DBSeqCount); -const char *UserFieldIndexToStr(unsigned i); - -extern float **g_SubstMx; - -static char g_IdChar = '|'; -static char g_DiffChar = ' '; - -static inline char GetSymN(byte Letter1, byte Letter2) - { - Letter1 = toupper(Letter1); - Letter2 = toupper(Letter2); - if (Letter1 == Letter2) - return g_IdChar; - return g_DiffChar; - } - -static inline char GetSymA(byte Letter1, byte Letter2) - { - Letter1 = toupper(Letter1); - Letter2 = toupper(Letter2); - if (Letter1 == Letter2) - return '|'; - - float Score = g_SubstMx[Letter1][Letter2]; - if (Score >= 2.0f) - return ':'; - if (Score > 0.0f) - return '.'; - return ' '; - } - -static inline char GetSym(byte Letter1, byte Letter2, bool Nucleo) - { - if (Nucleo) - return GetSymN(Letter1, Letter2); - else - return GetSymA(Letter1, Letter2); - } - -static unsigned GetNDig(unsigned n) - { - if (n < 10) - return 1; - if (n < 100) - return 2; - if (n < 1000) - return 3; - if (n < 10000) - return 4; - if (n < 100000) - return 5; - if (n < 1000000) - return 6; - return 10; - } - -extern unsigned *g_UserFieldIndexes; -extern unsigned g_UserFieldCount; - -#endif // out_h diff --git a/path.cpp b/path.cpp deleted file mode 100644 index 9340344..0000000 --- a/path.cpp +++ /dev/null @@ -1,151 +0,0 @@ -#include "myutils.h" -#include "path.h" -#include "timing.h" - -#define TRACE 0 - -const unsigned PathMagic = 0x9A783A16; - -struct PathBuffer - { - unsigned Magic; - char *Buffer; - unsigned Size; - bool InUse; - }; - -static PathBuffer **g_PathBuffers; -static unsigned g_PathBufferSize; - -static char *AllocBuffer(unsigned Size) - { - if (Size == 0) - return 0; - -// Is a free buffer that is big enough? - for (unsigned i = 0; i < g_PathBufferSize; ++i) - { - PathBuffer *PB = g_PathBuffers[i]; - asserta(PB->Magic == PathMagic); - if (!PB->InUse) - { - if (PB->Size >= Size) - { - PB->InUse = true; - return PB->Buffer; - } - if (PB->Buffer == 0) - { - unsigned Size2 = Size + 1024; - PB->Buffer = MYALLOC(char, Size2, Path); - PB->Size = Size2; - PB->InUse = true; - return PB->Buffer; - } - } - } - -// No available buffer, must expand g_PathBuffers[] - unsigned NewPathBufferSize = g_PathBufferSize + 1024; - PathBuffer **NewPathBuffers = MYALLOC(PathBuffer *, NewPathBufferSize, Path); - - for (unsigned i = 0; i < g_PathBufferSize; ++i) - NewPathBuffers[i] = g_PathBuffers[i]; - - for (unsigned i = g_PathBufferSize; i < NewPathBufferSize; ++i) - { - PathBuffer *PB = MYALLOC(PathBuffer, 1, Path); - PB->Magic = PathMagic; - PB->Buffer = 0; - PB->Size = 0; - PB->InUse = false; - NewPathBuffers[i] = PB; - } - - PathBuffer *PB = NewPathBuffers[g_PathBufferSize]; - - MYFREE(g_PathBuffers, g_PathBufferSize, Path); - g_PathBuffers = NewPathBuffers; - g_PathBufferSize = NewPathBufferSize; - - asserta(!PB->InUse && PB->Buffer == 0); - - unsigned Size2 = Size + 1024; - PB->Buffer = MYALLOC(char, Size2, Path); - PB->Size = Size2; - PB->InUse = true; - return PB->Buffer; - } - -static void FreeBuffer(char *Buffer) - { - if (Buffer == 0) - return; - - for (unsigned i = 0; i < g_PathBufferSize; ++i) - { - PathBuffer *PB = g_PathBuffers[i]; - if (PB->Buffer == Buffer) - { - asserta(PB->InUse); - PB->InUse = false; - return; - } - } - - Die("FreeBuffer, not found"); - } - -void PathData::Alloc(unsigned MaxLen) - { - if (MaxLen < Bytes) - return; - - StartTimer(PathAlloc); - if (Bytes > 0) - { - FreeBuffer(Front); - } - - Bytes = MaxLen + 1; - Front = AllocBuffer(Bytes); - Back = Front + Bytes - 1; - Start = 0; - EndTimer(PathAlloc); - } - -void PathData::Free() - { - FreeBuffer(Front); - Front = 0; - Start = 0; - Back = 0; - } - -void PathData::Copy(const PathData &rhs) - { - Alloc(rhs.Bytes); - strcpy(Front, rhs.Front); - Start = Front + (rhs.Start - rhs.Front); - } - -void PathData::FromStr(const char *PathStr) - { - asserta(PathStr != 0); - unsigned NeededBytes = (unsigned) strlen(PathStr) + 1; - Alloc(NeededBytes); - strcpy(Front, PathStr); - Start = Front; - } - -void LogPathStats() - { - Log("\n"); - unsigned Bytes = 0; - for (unsigned i = 0; i < g_PathBufferSize; ++i) - { - const PathBuffer *PB = g_PathBuffers[i]; - Bytes += PB->Size; - } - Log("%u paths allocated, total memory %u bytes\n", g_PathBufferSize, Bytes); - } diff --git a/path.h b/path.h deleted file mode 100644 index f63be7e..0000000 --- a/path.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef path_h -#define path_h - -struct PathData - { -private: - PathData(PathData &); - PathData &operator=(PathData &); - -public: - char *Start; - char *Front; - char *Back; - unsigned Bytes; - -public: - PathData() - { - Clear(true); - } - ~PathData() - { - Free(); - } - void Free(); - void Alloc(unsigned MaxLen); - void Clear(bool ctor = false) - { - Start = 0; - if (ctor) - { - Front = 0; - Back = 0; - Bytes = 0; - } - else - Free(); - } - void Copy(const PathData &rhs); - void FromStr(const char *PathStr); - void Reverse() - { - asserta(Start != 0); - unsigned L = (unsigned) strlen(Start); - for (unsigned k = 0; k < L/2; ++k) - { - char c = Start[k]; - Start[k] = Start[L-k-1]; - Start[L-k-1] = c; - } - } - void SetEmpty() - { - Start = 0; - } - - bool IsEmpty() const - { - return Start == 0; - } - }; - -#endif // path_h diff --git a/searchchime.cpp b/searchchime.cpp deleted file mode 100644 index c00a9c4..0000000 --- a/searchchime.cpp +++ /dev/null @@ -1,304 +0,0 @@ -#include "myutils.h" -#include "ultra.h" -#include "chime.h" -#include "uc.h" -#include "dp.h" -#include -#include - -#define TRACE 0 - -extern FILE *g_fUChime; - -void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ, - vector &Parents); - -void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD, - const string &PathQA, const string &PathQB, ChimeHit2 &Hit); - -double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo); - -static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path, - vector &IdVec, unsigned d) - { - IdVec.clear(); - const unsigned ColCount = SIZE(Path); - - const byte *Q = QSD.Seq; - const byte *P = PSD.Seq; - - const unsigned QL = QSD.L; - const unsigned PL = PSD.L; - - if (QL <= d) - { - IdVec.resize(QSD.L, 0); - return; - } - - unsigned QPos = 0; - unsigned PPos = 0; - - vector SameVec; - SameVec.reserve(QL); - for (unsigned Col = 0; Col < ColCount; ++Col) - { - char c = Path[Col]; - - bool Same = false; - if (c == 'M') - { - byte q = Q[QPos]; - byte p = P[PPos]; - Same = (toupper(q) == toupper(p)); - } - - if (c == 'M' || c == 'D') - { - ++QPos; - SameVec.push_back(Same); - } - - if (c == 'M' || c == 'I') - ++PPos; - } - - asserta(SIZE(SameVec) == QL); - - unsigned n = 0; - for (unsigned QPos = 0; QPos < d; ++QPos) - { - if (SameVec[QPos]) - ++n; - IdVec.push_back(n); - } - - for (unsigned QPos = d; QPos < QL; ++QPos) - { - if (SameVec[QPos]) - ++n; - IdVec.push_back(n); - if (SameVec[QPos-d]) - --n; - } - asserta(SIZE(IdVec) == QL); - -#if TRACE - { - Log("\n"); - Log("GetSmoothedIdVec\n"); - unsigned QPos = 0; - unsigned PPos = 0; - Log("Q P Same Id\n"); - Log("- - ---- -------\n"); - for (unsigned Col = 0; Col < ColCount; ++Col) - { - char c = Path[Col]; - - bool Same = false; - if (c == 'M') - { - byte q = Q[QPos]; - byte p = P[PPos]; - Same = (toupper(q) == toupper(p)); - Log("%c %c %4c %7d\n", q, p, tof(Same), IdVec[QPos]); - } - - if (c == 'M' || c == 'D') - ++QPos; - if (c == 'M' || c == 'I') - ++PPos; - } - } -#endif - } - -bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, - const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF, - float MinFractId, ChimeHit2 &Hit) - { - Hit.Clear(); - Hit.QLabel = QSD.Label; - - if (opt_verbose) - { - Log("\n"); - Log("SearchChime()\n"); - Log("Query>%s\n", QSD.Label); - } - - vector Parents; - GetCandidateParents(U, QSD, QAb, Parents); - - unsigned ParentCount = SIZE(Parents); - if (ParentCount <= 1) - { - if (opt_verbose) - Log("%u candidate parents, done.\n", ParentCount); - return false; - } - - if (opt_fastalign) - HF.SetA(QSD); - HSPFinder *ptrHF = (opt_fastalign ? &HF : 0); - - unsigned ChunkLength; - vector ChunkLos; - GetChunkInfo(QSD.L, ChunkLength, ChunkLos); - const unsigned ChunkCount = SIZE(ChunkLos); - - vector ChunkIndexToBestId(ChunkCount, 0); - vector ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX); - - vector PSDs; - vector Paths; - double TopPctId = 0.0; - unsigned TopParentIndex = UINT_MAX; - unsigned QL = QSD.L; - vector MaxIdVec(QL, 0); - for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex) - { - unsigned ParentSeqIndex = Parents[ParentIndex]; - - SeqData PSD; - //PSD.Label = U.GetSeedLabel(ParentSeqIndex); - //PSD.Seq = U.GetSeedSeq(ParentSeqIndex); - //PSD.L = U.GetSeedLength(ParentSeqIndex); - //PSD.Index = ParentSeqIndex; - U.GetSeqData(ParentSeqIndex, PSD); - PSDs.push_back(PSD); - - if (opt_fastalign) - HF.SetB(PSD); - - PathData PD; - - float HSPId; - bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD); - if (!Found) - { - Paths.push_back(""); - continue; - } - - double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true); - if (opt_selfid && PctId == 100.0) - { - Paths.push_back(""); - continue; - } - - if (PctId > TopPctId) - { - TopParentIndex = ParentIndex; - TopPctId = PctId; - if (TopPctId >= 100.0 - opt_mindiv) - { - if (opt_verbose) - { - Log(" %.1f%% >%s\n", TopPctId, PSD.Label); - Log(" Top hit exceeds ctl threshold, done.\n"); - return false; - } - } - } - - string Path = PD.Start; - Paths.push_back(Path); - - vector IdVec; - GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow); - - for (unsigned QPos = 0; QPos < QL; ++QPos) - if (IdVec[QPos] > MaxIdVec[QPos]) - MaxIdVec[QPos] = IdVec[QPos]; - } - - vector BestParents; - for (unsigned k = 0; k < opt_maxp; ++k) - { - unsigned BestParent = UINT_MAX; - unsigned BestCov = 0; - for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex) - { - const SeqData &PSD = PSDs[ParentIndex]; - const string &Path = Paths[ParentIndex]; - if (Path == "") - continue; - - vector IdVec; - GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow); - - unsigned Cov = 0; - for (unsigned QPos = 0; QPos < QL; ++QPos) - if (IdVec[QPos] == MaxIdVec[QPos]) - ++Cov; - - if (Cov > BestCov) - { - BestParent = ParentIndex; - BestCov = Cov; - } - } - - if (BestParent == UINT_MAX) - break; - - BestParents.push_back(BestParent); - vector IdVec; - - const SeqData &PSD = PSDs[BestParent]; - const string &Path = Paths[BestParent]; - GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow); - for (unsigned QPos = 0; QPos < QL; ++QPos) - if (IdVec[QPos] == MaxIdVec[QPos]) - MaxIdVec[QPos] = UINT_MAX; - } - - unsigned BestParentCount = SIZE(BestParents); - - if (opt_verbose) - { - Log("%u/%u best parents\n", BestParentCount, ParentCount); - for (unsigned k = 0; k < BestParentCount; ++k) - { - unsigned i = BestParents[k]; - Log(" %s\n", PSDs[i].Label); - } - } - - bool Found = false; - for (unsigned k1 = 0; k1 < BestParentCount; ++k1) - { - unsigned i1 = BestParents[k1]; - asserta(i1 < ParentCount); - - const SeqData &PSD1 = PSDs[i1]; - const string &Path1 = Paths[i1]; - - for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2) - { - unsigned i2 = BestParents[k2]; - asserta(i2 < ParentCount); - asserta(i2 != i1); - - const SeqData &PSD2 = PSDs[i2]; - const string &Path2 = Paths[i2]; - - ChimeHit2 Hit2; - AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2); - Hit2.PctIdQT = TopPctId; - - if (Hit2.Accept()) - Found = true; - - if (Hit2.Score > Hit.Score) - Hit = Hit2; - - if (opt_verbose) - Hit2.LogMe(); - } - } - - return Found; - } diff --git a/seq.h b/seq.h deleted file mode 100644 index 9014641..0000000 --- a/seq.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef seq_h -#define seq_h - -struct ORFData; - -struct SeqData - { - const char *Label; - const byte *Seq; - unsigned L; - unsigned Index; - -// RevComp means that SeqData.Seq is reverse-complemented relative -// to the sequence in the input file (query or db). Coordinates in -// a hit (e.g., AlnData) will be relative to SeqData.Seq, so both -// the sequence and the coordinates should be r.c.'d for output. - bool RevComp; - bool Nucleo; - const ORFData *ORFParent; - - SeqData() - { - Clear(); - } - - void Clear() - { - Label = 0; - Seq = 0; - L = 0; - Index = UINT_MAX; - RevComp = false; - Nucleo = false; - ORFParent = 0; - } - }; - -#endif // seq_h diff --git a/seqdb.cpp b/seqdb.cpp deleted file mode 100644 index 03de189..0000000 --- a/seqdb.cpp +++ /dev/null @@ -1,289 +0,0 @@ -#include "myutils.h" -#include "seqdb.h" -#include "alpha.h" -#include "timing.h" -#include "sfasta.h" -#include "seq.h" - -void SeqToFasta(FILE *f, const char *Label, const byte *Seq, unsigned L) - { - const unsigned ROWLEN = 80; - if (Label != 0) - fprintf(f, ">%s\n", Label); - unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN; - for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex) - { - unsigned From = BlockIndex*ROWLEN; - unsigned To = From + ROWLEN; - if (To >= L) - To = L; - for (unsigned Pos = From; Pos < To; ++Pos) - fputc(Seq[Pos], f); - fputc('\n', f); - } - } - -SeqDB::~SeqDB() - { - Clear(); - } - -SeqDB::SeqDB() - { - Clear(true); - } - -void SeqDB::Clear(bool ctor) - { - if (!ctor) - { - for (unsigned i = 0; i < m_SeqCount; ++i) - { - unsigned n = strlen(m_Labels[i]); - MYFREE(m_Labels[i], n, SeqDB); - MYFREE(m_Seqs[i], m_SeqLengths[i], SeqDB); - } - MYFREE(m_Labels, m_Size, SeqDB); - MYFREE(m_Seqs, m_Size, SeqDB); - MYFREE(m_SeqLengths, m_Size, SeqDB); - } - - m_FileName.clear(); - m_SeqCount = 0; - m_Size = 0; - - m_Labels = 0; - m_Seqs = 0; - m_SeqLengths = 0; - - m_Aligned = false; - m_IsNucleo = false; - m_IsNucleoSet = false; - } - -void SeqDB::InitEmpty(bool Nucleo) - { - Clear(); - m_IsNucleo = Nucleo; - m_IsNucleoSet = true; - } - -void SeqDB::FromFasta(const string &FileName, bool AllowGaps) - { - Clear(); - m_FileName = FileName; - SFasta SF; - - SF.Open(FileName); - SF.m_AllowGaps = AllowGaps; - - ProgressStep(0, 1000, "Reading %s", FileName.c_str()); - for (;;) - { - unsigned QueryPctDoneX10 = SF.GetPctDoneX10(); - ProgressStep(QueryPctDoneX10, 1000, "Reading %s", FileName.c_str()); - const byte *Seq = SF.GetNextSeq(); - if (Seq == 0) - break; - - const char *Label = SF.GetLabel(); - unsigned L = SF.GetSeqLength(); - AddSeq(Label, Seq, L); - } - ProgressStep(999, 1000, "Reading %s", FileName.c_str()); - - SetIsNucleo(); - - Progress("%s sequences\n", IntToStr(GetSeqCount())); - } - -void SeqDB::ToFasta(const string &FileName) const - { - FILE *f = CreateStdioFile(FileName); - for (unsigned SeqIndex = 0; SeqIndex < GetSeqCount(); ++SeqIndex) - ToFasta(f, SeqIndex); - CloseStdioFile(f); - } - -void SeqDB::SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel) const - { - if (WithLabel) - fprintf(f, ">%s\n", GetLabel(SeqIndex)); - - const unsigned ROWLEN = 80; - - unsigned L = GetSeqLength(SeqIndex); - const byte *Seq = GetSeq(SeqIndex); - unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN; - for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex) - { - unsigned From = BlockIndex*ROWLEN; - unsigned To = From + ROWLEN; - if (To >= L) - To = L; - for (unsigned Pos = From; Pos < To; ++Pos) - fputc(Seq[Pos], f); - fputc('\n', f); - } - } - -void SeqDB::ToFasta(FILE *f, unsigned SeqIndex) const - { - asserta(SeqIndex < m_SeqCount); - fprintf(f, ">%s\n", GetLabel(SeqIndex)); - SeqToFasta(f, SeqIndex); - } - -unsigned SeqDB::GetMaxLabelLength() const - { - const unsigned SeqCount = GetSeqCount(); - unsigned MaxL = 0; - for (unsigned Index = 0; Index < SeqCount; ++Index) - { - unsigned L = (unsigned) strlen(m_Labels[Index]); - if (L > MaxL) - MaxL = L; - } - return MaxL; - } - -unsigned SeqDB::GetMaxSeqLength() const - { - const unsigned SeqCount = GetSeqCount(); - unsigned MaxL = 0; - for (unsigned Index = 0; Index < SeqCount; ++Index) - { - unsigned L = m_SeqLengths[Index]; - if (L > MaxL) - MaxL = L; - } - return MaxL; - } - -void SeqDB::LogMe() const - { - Log("\n"); - const unsigned SeqCount = GetSeqCount(); - Log("SeqDB %u seqs, aligned=%c\n", SeqCount, tof(m_Aligned)); - if (SeqCount == 0) - return; - - Log("Index Label Length Seq\n"); - Log("----- ---------------- ------ ---\n"); - for (unsigned Index = 0; Index < SeqCount; ++Index) - { - Log("%5u", Index); - Log(" %16.16s", m_Labels[Index]); - unsigned L = m_SeqLengths[Index]; - Log(" %6u", L); - Log(" %*.*s", L, L, m_Seqs[Index]); - Log("\n"); - } - } - -void SeqDB::GetSeqData(unsigned Id, SeqData &Buffer) const - { - asserta(Id < m_SeqCount); - Buffer.Seq = m_Seqs[Id]; - Buffer.Label = m_Labels[Id]; - Buffer.L = m_SeqLengths[Id]; - Buffer.Index = Id; - Buffer.ORFParent = 0; - Buffer.RevComp = false; - Buffer.Nucleo = IsNucleo(); - } - -void SeqDB::SetIsNucleo() - { - const unsigned SeqCount = GetSeqCount(); - unsigned N = 0; - for (unsigned i = 0; i < 100; ++i) - { - unsigned SeqIndex = unsigned(rand()%SeqCount); - const byte *Seq = GetSeq(SeqIndex); - unsigned L = GetSeqLength(SeqIndex); - const unsigned Pos = unsigned(rand()%L); - byte c = Seq[Pos]; - - if (g_IsNucleoChar[c]) - ++N; - } - m_IsNucleo = (N > 80); - m_IsNucleoSet = true; - } - -unsigned SeqDB::GetTotalLength() const - { - const unsigned SeqCount = GetSeqCount(); - unsigned TotalLength = 0; - for (unsigned Id = 0; Id < SeqCount; ++Id) - TotalLength += GetSeqLength(Id); - return TotalLength; - } - -unsigned SeqDB::AddSeq(const char *Label, const byte *Seq, unsigned L) - { - StartTimer(AddSeq); - if (m_SeqCount >= m_Size) - { - unsigned NewSize = unsigned(m_Size*1.5) + 1024; - char **NewLabels = MYALLOC(char *, NewSize, SeqDB); - byte **NewSeqs = MYALLOC(byte *, NewSize, SeqDB); - unsigned *NewSeqLengths = MYALLOC(unsigned, NewSize, SeqDB); - - for (unsigned i = 0; i < m_SeqCount; ++i) - { - NewLabels[i] = m_Labels[i]; - NewSeqs[i] = m_Seqs[i]; - NewSeqLengths[i] = m_SeqLengths[i]; - } - - MYFREE(m_Labels, m_SeqCount, SeqDB); - MYFREE(m_Seqs, m_SeqCount, SeqDB); - MYFREE(m_SeqLengths, m_SeqCount, SeqDB); - - m_Labels = NewLabels; - m_Seqs = NewSeqs; - m_SeqLengths = NewSeqLengths; - m_Size = NewSize; - } - - unsigned Index = m_SeqCount++; - m_Seqs[Index] = MYALLOC(byte, L, SeqDB); - memcpy(m_Seqs[Index], Seq, L); - - unsigned n = strlen(Label) + 1; - m_Labels[Index] = MYALLOC(char, n, SeqDB); - memcpy(m_Labels[Index], Label, n); - - if (Index == 0) - m_Aligned = true; - else - m_Aligned = (m_Aligned && L == m_SeqLengths[0]); - - m_SeqLengths[Index] = L; - - EndTimer(AddSeq); - return Index; - } - -unsigned SeqDB::GetIndex(const char *Label) const - { - for (unsigned i = 0; i < m_SeqCount; ++i) - if (strcmp(Label, m_Labels[i]) == 0) - return i; - Die("SeqDB::GetIndex(%s), not found", Label); - return UINT_MAX; - } - -void SeqDB::MakeLabelToIndex(map &LabelToIndex) - { - LabelToIndex.clear(); - for (unsigned i = 0; i < m_SeqCount; ++i) - { - const string &Label = string(GetLabel(i)); - if (LabelToIndex.find(Label) != LabelToIndex.end()) - Die("Duplicate label: %s", Label.c_str()); - LabelToIndex[Label] = i; - } - } diff --git a/seqdb.h b/seqdb.h deleted file mode 100644 index e4af984..0000000 --- a/seqdb.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef seqdb_h -#define seqdb_h - -#include -#include - -struct SeqData; - -using namespace std; - -struct SeqDB - { -private: - SeqDB(const SeqDB &rhs); - SeqDB &operator=(const SeqDB &rhs); - -public: - string m_FileName; - char **m_Labels; - byte **m_Seqs; - unsigned *m_SeqLengths; - unsigned m_SeqCount; - unsigned m_Size; - - bool m_Aligned; - bool m_IsNucleo; - bool m_IsNucleoSet; - -public: - SeqDB(); - ~SeqDB(); - void Clear(bool ctor = false); - void InitEmpty(bool Nucleo); - - unsigned AddSeq(const char *Label, const byte *Seq, unsigned L); - - byte *GetSeq(unsigned SeqIndex) const - { - asserta(SeqIndex < m_SeqCount); - return m_Seqs[SeqIndex]; - } - - const char *GetLabel(unsigned SeqIndex) const - { - asserta(SeqIndex < m_SeqCount); - return m_Labels[SeqIndex]; - } - - unsigned GetSeqLength(unsigned SeqIndex) const - { - asserta(SeqIndex < m_SeqCount); - return m_SeqLengths[SeqIndex]; - } - - unsigned GetSeqCount() const - { - return m_SeqCount; - } - - unsigned GetPairCount() const - { - unsigned SeqCount = GetSeqCount(); - return (SeqCount*(SeqCount - 1))/2; - } - - unsigned GetPairIndex(unsigned SeqIndex1, unsigned SeqIndex2) const - { - if (SeqIndex1 > SeqIndex2) - return (SeqIndex1*(SeqIndex1 - 1))/2 + SeqIndex2; - return (SeqIndex2*(SeqIndex2 - 1))/2 + SeqIndex1; - } - - unsigned GetColCount() const - { - if (!m_Aligned) - Die("SeqDB::GetColCount, not aligned"); - if (m_SeqCount == 0) - Die("SeqDB::GetColCount, empty"); - return m_SeqLengths[0]; - } - - bool IsNucleo() const - { - asserta(m_IsNucleoSet); - return m_IsNucleo; - } - - void GetSeqData(unsigned Id, SeqData &Buffer) const; - - unsigned GetMaxLabelLength() const; - unsigned GetMaxSeqLength() const; - void SetIsNucleo(); - unsigned GetIndex(const char *Label) const; - void MakeLabelToIndex(map &LabelToIndex); - - void LogMe() const; - void FromFasta(const string &FileName, bool AllowGaps = false); - - void ToFasta(const string &FileName) const; - void ToFasta(FILE *f, unsigned SeqIndex) const; - void SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel = false) const; - - unsigned GetTotalLength() const; - }; - -bool isgap(byte c); - -#endif diff --git a/setnucmx.cpp b/setnucmx.cpp deleted file mode 100644 index 030ff5a..0000000 --- a/setnucmx.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "myutils.h" -#include "mx.h" - -Mx g_SubstMxf; -float **g_SubstMx; - -static const char Alphabet[] = "ACGTU"; - -void SetNucSubstMx(double Match, double Mismatch) - { - static bool Done = false; - if (Done) - return; - Done = true; - - if (Match <= 0.0) - Die("Match score should be +ve"); - if (Mismatch >= 0.0) - Die("Mismatch score should be -ve"); - - unsigned N = unsigned(strlen(Alphabet)); - - g_SubstMxf.Alloc("NUCMX", 256, 256); - strcpy(g_SubstMxf.m_Alpha, "ACGT"); - g_SubstMxf.Init(0); - g_SubstMx = g_SubstMxf.GetData(); - for (unsigned i = 0; i < N; ++i) - { - for (unsigned j = 0; j < N; ++j) - { - float v = float(i == j ? Match : Mismatch); - - byte ui = (byte) toupper(Alphabet[i]); - byte uj = (byte) toupper(Alphabet[j]); - byte li = (byte) tolower(ui); - byte lj = (byte) tolower(uj); - ui = (byte) toupper(ui); - uj = (byte) toupper(uj); - - g_SubstMx[ui][uj] = v; - g_SubstMx[uj][ui] = v; - - g_SubstMx[ui][lj] = v; - g_SubstMx[uj][li] = v; - - g_SubstMx[li][uj] = v; - g_SubstMx[lj][ui] = v; - - g_SubstMx[li][lj] = v; - g_SubstMx[lj][li] = v; - } - } - - for (unsigned j = 0; j < N; ++j) - { - float v = 0.0f; - - byte ui = (byte) 'N'; - byte uj = (byte) toupper(Alphabet[j]); - byte li = (byte) 'n'; - byte lj = (byte) tolower(uj); - ui = (byte) toupper(ui); - uj = (byte) toupper(uj); - - g_SubstMx[ui][uj] = v; - g_SubstMx[uj][ui] = v; - - g_SubstMx[ui][lj] = v; - g_SubstMx[uj][li] = v; - - g_SubstMx[li][uj] = v; - g_SubstMx[lj][ui] = v; - - g_SubstMx[li][lj] = v; - g_SubstMx[lj][li] = v; - } - } diff --git a/sfasta.cpp b/sfasta.cpp deleted file mode 100644 index 918d4f8..0000000 --- a/sfasta.cpp +++ /dev/null @@ -1,467 +0,0 @@ -#include "sfasta.h" -#include "orf.h" -#include "alpha.h" -#include "timing.h" - -static inline bool isgap(byte c) - { - return c == '-' || c == '.'; - } - -const unsigned BufferSize = 16*1024*1024; - -static unsigned GetMaxPoly(const byte *Seq, unsigned L) - { - byte CurrChar = Seq[0]; - unsigned Start = 0; - unsigned MaxLen = 1; - for (unsigned i = 1; i < L; ++i) - { - char c = Seq[i]; - if (c != CurrChar || i+1 == L) - { - unsigned Len = i - Start; - if (Len > MaxLen) - MaxLen = Len; - CurrChar = c; - Start = i; - } - } - return MaxLen; - } - -SFasta::SFasta() - { - m_FileName = ""; - m_File = 0; - m_Buffer = 0; - m_BufferSize = 0; - m_BufferOffset = 0; - m_BufferBytes = 0; - m_FilePos = 0; - m_FileSize = 0; - m_Label = 0; - m_SeqLength = 0; - m_TooShortCount = 0; - m_TooLongCount = 0; - m_ShortestLength = 0; - m_LongestLength = 0; - m_IsNucleo = false; - m_IsNucleoSet = false; - } - -SFasta::~SFasta() - { - Clear(); - } - -void SFasta::Clear() - { - MYFREE(m_Buffer, m_BufferSize, SFasta); - if (m_File != 0) - CloseStdioFile(m_File); - - m_FileName = ""; - m_File = 0; - m_Buffer = 0; - m_BufferSize = 0; - m_BufferOffset = 0; - m_BufferBytes = 0; - m_FilePos = 0; - m_FileSize = 0; - m_Label = 0; - m_SeqLength = 0; - m_SeqIndex = UINT_MAX; - m_AllowGaps = false; - m_IsNucleo = false; - m_IsNucleoSet = false; - m_TooShortCount = 0; - m_TooLongCount = 0; - m_ShortestLength = 0; - m_LongestLength = 0; - m_TooPolyCount = 0; - } - -void SFasta::LogMe() const - { - Log("\n"); - Log("SFasta::LogMe()\n"); - Log("FileName=%s\n", m_FileName.c_str()); - Log("FileSize=%u\n", (unsigned) m_FileSize); - Log("FilePos=%u\n", (unsigned) m_FilePos); - Log("BufferSize=%u\n", m_BufferSize); - Log("BufferPos=%u\n", m_BufferOffset); - Log("BufferBytes=%u\n", m_BufferBytes); - if (m_Label == 0) - Log("Label=NULL\n"); - else - Log("Label=%s\n", m_Label); - Log("SeqLength=%u\n", m_SeqLength); - } - -const byte *SFasta::GetNextSeq() - { - for (;;) - { - const byte *Seq = GetNextSeqLo(); - if (Seq == 0) - { - if (m_TooShortCount > 0) - Warning("%u short sequences (--minlen %u, shortest %u) discarded from %s", - m_TooShortCount, opt_minlen, m_ShortestLength, m_FileName.c_str()); - if (m_TooLongCount > 0) - Warning("%u long sequences (--maxlen %u, longest %u) discarded from %s", - m_TooLongCount, opt_maxlen, m_LongestLength, m_FileName.c_str()); - if (m_TooPolyCount > 0) - Warning("%u sequences with long homopolymers discarded (--maxpoly %u)", - m_TooPolyCount, opt_maxpoly); - return 0; - } - if (m_SeqLength < opt_minlen) - { - ++m_TooShortCount; - if (m_ShortestLength == 0 || m_SeqLength < m_ShortestLength) - m_ShortestLength = m_SeqLength; - continue; - } - if (m_SeqLength > opt_maxlen && opt_maxlen != 0) - { - if (m_LongestLength == 0 || m_SeqLength > m_LongestLength) - m_LongestLength = m_SeqLength; - ++m_TooLongCount; - continue; - } - return Seq; - } - } - -const byte *SFasta::GetNextSeqLo() - { -// End of cache? - if (m_BufferOffset == m_BufferBytes) - { - // End of file? - if (m_FilePos == m_FileSize) - return 0; - FillCache(); - } - - StartTimer(SF_GetNextSeq); - asserta(m_Buffer[m_BufferOffset] == '>'); - m_Label = (char *) (m_Buffer + m_BufferOffset + 1); - -//// Scan to end-of-line. -//// Use dubious library function strchr() in the hope -//// that it uses fast machine code. -// byte *ptr = (byte *) strchr(m_Label, '\n'); -// asserta(ptr != 0); -// *ptr = 0; - - byte *ptr = 0; - for (unsigned i = m_BufferOffset; i < m_BufferSize; ++i) - { - char c = m_Buffer[i]; - if (c == '\n' || c == '\r') - { - ptr = m_Buffer + i; - break; - } - } - asserta(ptr != 0); - - if (opt_trunclabels) - { - for (char *p = m_Label; *p; ++p) - if (isspace(*p)) - { - *p = 0; - break; - } - } - else - { - for (char *p = m_Label; *p; ++p) - { - if (*p == '\t') - *p = ' '; - else if (*p == '\r' || *p == '\n') - { - *p = 0; - char NextChar = *(p+1); - if (NextChar == '\r' || NextChar == '\n') - ++p; - break; - } - } - } - -// ptr points to end-of-line. -// Move to start of sequence data. - byte *Seq = ++ptr; - -// Delete white space in-place - byte *To = ptr; - m_BufferOffset = (unsigned) (ptr - m_Buffer); - while (m_BufferOffset < m_BufferBytes) - { - byte c = m_Buffer[m_BufferOffset]; - if (c == '>') - { - char prevc = '\n'; - if (m_BufferOffset > 0) - prevc = m_Buffer[m_BufferOffset-1]; - if (prevc == '\n' || prevc == '\r') - break; - } - ++m_BufferOffset; - if (isalpha(c) || (isgap(c) && m_AllowGaps)) - *To++ = c; - else if (c == '\n' || c == '\r') - continue; - else - { - const char *Label = (m_Label == 0 ? "" : m_Label); - static bool WarningDone = false; - if (!WarningDone) - { - if (isgap(c)) - Warning("Ignoring gaps in FASTA file '%s'", - m_FileName.c_str()); - else if (isprint(c)) - Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s", - m_FileName.c_str(), c, Label); - else - Warning("Invalid FASTA file '%s', non-printing byte (hex %02x) in sequence >%s", - m_FileName.c_str(), c, Label); - WarningDone = true; - } - continue; - } - } - m_SeqLength = unsigned(To - Seq); - - if (m_SeqIndex == UINT_MAX) - m_SeqIndex = 0; - else - ++m_SeqIndex; - - EndTimer(SF_GetNextSeq); - return Seq; - } - -void SFasta::Open(const string &FileName) - { - Clear(); - m_FileName = FileName; - m_File = OpenStdioFile(FileName); - m_BufferSize = BufferSize; - //m_Buffer = myalloc(m_BufferSize); - m_Buffer = MYALLOC(byte, m_BufferSize, SFasta); - m_FileSize = GetStdioFileSize(m_File); - } - -void SFasta::Rewind() - { - m_BufferOffset = 0; - m_BufferBytes = 0; - m_FilePos = 0; - } - -bool SFasta::SetIsNucleo() - { - if (m_FilePos != 0) - Die("SFasta::IsNucleo, not at BOF"); - - unsigned LetterCount = 0; - unsigned NucleoLetterCount = 0; - for (;;) - { - const byte *Seq = GetNextSeq(); - if (Seq == 0) - break; - unsigned L = GetSeqLength(); - for (unsigned i = 0; i < L; ++i) - if (g_IsNucleoChar[Seq[i]]) - ++NucleoLetterCount; - LetterCount += L; - if (LetterCount > 256) - break; - } - Rewind(); - if (LetterCount == 0) - { - m_IsNucleoSet = true; - m_IsNucleo = true; - return true; - } - -// Nucleo if more than 90% nucleo letters AGCTUN - m_IsNucleo = double(NucleoLetterCount)/LetterCount > 0.9; - m_IsNucleoSet = true; - return m_IsNucleo; - } - -void SFasta::FillCache() - { - StartTimer(SF_FillCache); - asserta(m_FilePos < m_FileSize); - -// off_t may be larger type than unsigned, e.g. 64- vs. 32-bit. - off_t otBytesToRead = m_FileSize - m_FilePos; - - bool FinalBuffer = true; - if (otBytesToRead > (off_t) m_BufferSize) - { - FinalBuffer = false; - otBytesToRead = m_BufferSize; - } - - unsigned BytesToRead = unsigned(otBytesToRead); - asserta(BytesToRead > 0); - asserta(BytesToRead <= m_BufferSize); - - SetStdioFilePos(m_File, m_FilePos); - ReadStdioFile(m_File, m_Buffer, BytesToRead); - if (m_Buffer[0] != '>') - { - if (m_FilePos == 0) - Die("Input is not FASTA file"); - else - Die("SFasta::FillCache() failed, expected '>'"); - } - - m_BufferOffset = 0; - -// If last buffer in file, done - if (FinalBuffer) - { - m_BufferBytes = BytesToRead; - m_FilePos += BytesToRead; - EndTimer(SF_FillCache); - return; - } - -// If not last buffer, truncate any partial sequence -// at end of buffer. Search backwards to find last '>'. - byte *ptr = m_Buffer + BytesToRead - 1; - while (ptr > m_Buffer) - { - if (ptr[0] == '>' && (ptr[-1] == '\n' || ptr[-1] == '\r')) - break; - --ptr; - } - - if (ptr == m_Buffer) - { - LogMe(); - if (*ptr != '>') - { - // No '>' found. - // This might techincally be legal FASTA if the entire - // buffer is white space, but strange if not the last buffer - // in the file, so quit anyway. - Die("Failed to find '>' (pos=%u, bytes=%u)", - (unsigned) m_FilePos, BytesToRead); - } - else - { - // Entire buffer is one sequence which may be truncated. - Die("Sequence too long (pos=%u, bytes=%u)", - (unsigned) m_FilePos, BytesToRead); - } - } - - asserta(*ptr == '>'); - - m_BufferBytes = unsigned(ptr - m_Buffer); - m_FilePos += m_BufferBytes; - - EndTimer(SF_FillCache); - } - -unsigned SFasta::GetPctDoneX10() const - { - if (m_FilePos == 0 || m_FileSize == 0) - return 0; - - assert(m_FilePos >= (off_t) m_BufferBytes); - off_t BufferStart = m_FilePos - m_BufferBytes; - off_t BufferPos = BufferStart + m_BufferOffset; - - unsigned iPctX10 = unsigned(10.0*double(BufferPos)*100.0/double(m_FileSize)); - if (iPctX10 == 0) - return 1; - if (iPctX10 >= 999) - return 998; - return iPctX10; - } - -double SFasta::GetPctDone() const - { - if (m_FilePos == 0 || m_FileSize == 0) - return 0; - - assert(m_FilePos >= (off_t) m_BufferBytes); - off_t BufferStart = m_FilePos - m_BufferBytes; - off_t BufferPos = BufferStart + m_BufferOffset; - - return double(BufferPos)*100.0/double(m_FileSize); - } - -bool SFasta::GetNextSD(SeqData &SD) - { - SD.Seq = GetNextSeq(); - if (SD.Seq == 0) - return false; - - SD.Label = GetLabel(); - SD.L = GetSeqLength(); - SD.Index = GetSeqIndex(); - SD.ORFParent = 0; - SD.Nucleo = GetIsNucleo(); - SD.RevComp = false; - - return true; - } - -#if TEST -void TestSFasta() - { - SFasta SF; - SF.Open(opt_input); - - if (opt_verbose) - { - Log(" Index Length Label\n"); - Log("------- ------- -----\n"); - } - - unsigned Index = 0; - unsigned SeqCount = 0; - double LetterCount = 0.0; - ProgressStep(0, 1000, "Reading"); - for (;;) - { - const byte *Seq = SF.GetNextSeq(); - if (Seq == 0) - break; - ProgressStep(SF.GetPctDoneX10(), 1000, "Reading"); - const char *Label = SF.GetLabel(); - unsigned L = SF.GetSeqLength(); - ++SeqCount; - LetterCount += L; - - if (opt_verbose) - { - Log(">%7u %7u '%s'\n", Index, L, Label); - Log("+%7.7s %7.7s \"%*.*s\"\n", "", "", L, L, Seq); - } - - ++Index; - } - ProgressStep(999, 1000, "Reading"); - - Progress("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount)); - Log("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount)); - } -#endif // TEST diff --git a/sfasta.h b/sfasta.h deleted file mode 100644 index ed2f2ff..0000000 --- a/sfasta.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef sfasta_h -#define sfasta_h - -#include "myutils.h" -#include "seq.h" - -typedef void (*ON_START_XSEQ)(const SeqData &SD); -typedef void (*ON_END_XSEQ)(const SeqData &SD); - -// Sequential reader for FASTA file format. -// Serves sequences in file order to save memory. -// Caches biggish chunks to compromise memory vs. speed. -class SFasta - { -public: - string m_FileName; - FILE *m_File; - bool m_AllowGaps; - - off_t m_FileSize; - -// Position to start next read - off_t m_FilePos; - -// Cached data. - byte *m_Buffer; - -// Bytes allocated to m_Buffer - unsigned m_BufferSize; - -// Current position in buffer, normally points to '>' - unsigned m_BufferOffset; - -// File data in buffer <= m_BufferSize - unsigned m_BufferBytes; - -// Current label -// Points into m_Buffer, not a separate buffer. - char *m_Label; - -// Current sequence length - unsigned m_SeqLength; - -// Current seq index - unsigned m_SeqIndex; - - unsigned m_ShortestLength; - unsigned m_LongestLength; - unsigned m_TooShortCount; - unsigned m_TooLongCount; - unsigned m_TooPolyCount; - -private: - bool m_IsNucleoSet; - bool m_IsNucleo; - -public: - SFasta(); - ~SFasta(); - - void Clear(); - void Open(const string &FileName); - void Rewind(); - bool SetIsNucleo(); - bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; }; - -// Get next sequence. -// Returns zero on end-of-file - const byte *GetNextSeq(); - -// Get next sequence as SeqData object, return false on end-of-file. - bool GetNextSD(SeqData &SD); - -// Length of most recent sequence returned by GetNextSeq(). - unsigned GetSeqLength() const { return m_SeqLength; } - -// Label of most recent sequence returned by GetNextSeq(). - const char *GetLabel() const { return m_Label; } - -// Index of most recent sequence returned by GetNextSeq(). - unsigned GetSeqIndex() const { return m_SeqIndex; } - - unsigned GetPctDoneX10() const; - double GetPctDone() const; - - void LogMe() const; - -private: - void FillCache(); - const byte *GetNextSeqLo(); - }; - -#endif // sfasta_h diff --git a/svnmods.h b/svnmods.h deleted file mode 100644 index c68513e..0000000 --- a/svnmods.h +++ /dev/null @@ -1,15 +0,0 @@ -"Path: .\n" -"URL: file:///public/svn/usearch\n" -"Repository Root: file:///public/svn/usearch\n" -"Repository UUID: 58640331-1837-4c17-bc3e-636dc59aced1\n" -"Revision: 34\n" -"Node Kind: directory\n" -"Schedule: normal\n" -"Last Changed Author: bob\n" -"Last Changed Rev: 34\n" -"Last Changed Date: 2011-05-01 08:29:04 -0700 (Sun, 01 May 2011)\n" -"\n" -"? mk\n" -"! svnmods.h\n" -"M ungappedblastid.cpp\n" -"M chaindisjointhits.cpp\n" diff --git a/svnversion.h b/svnversion.h deleted file mode 100644 index 2a64d50..0000000 --- a/svnversion.h +++ /dev/null @@ -1 +0,0 @@ -"40" diff --git a/timers.h b/timers.h deleted file mode 100644 index 81cf7d1..0000000 --- a/timers.h +++ /dev/null @@ -1,173 +0,0 @@ -T(MxBase_Alloc) -T(MxBase_FreeData) -T(MxBase_AllocData) -T(SortSeqIndexes) -T(Alloc_Vectors) -T(MainLoop_NotNW) -T(WriteOutput) -T(NWB) -T(ReadAllStdioFile) -T(Windex_Init) -T(Windex_SetSeqIndex) -T(SeqToWords) -T(SeqToWordsStep) -T(SeqToShortWords) -T(SeqToShortWordsA) -T(SeqToShortWordsB) -T(GetFractIdB) -T(Windex_UniqueWordsAlloc) -T(Windex_UniqueWords) -T(GetPctId) -T(Windex_Reset) -T(GetSig) -T(NWEditDist) -T(EditDist_Myers) -T(EditDist_BlockTarget) -T(NWBand) -T(WordCounting) -T(NWAff) -T(NWAffBand) -T(NWSimple) -T(NWSimpleB) -T(BandWrap) -T(IncIdCounts) -T(GetBestDiagB) -T(GetBestDiagB1) -T(GetBestDiagB2) -T(ClusterInit) -T(ClusterPrep) -T(HotSort1) -T(HotSort2) -T(SortA) -T(SortB) -T(CountSort) -T(AddWords) -T(ClusterWindex) -T(MainInit) -T(Output) -T(WindexTail) -T(WindexExit) -T(Sort) -T(U_AllocSeqLength) -T(U_AllocSeedCount) -T(U_AddSeed) -T(AddSeq) -T(U_SetWordCounts) -T(U_SetWordCountsHash) -T(U_SetWordScores) -T(U_SetHotHits) -T(U_SetHotHitsHash) -T(U_SetHotHitsScores) -T(U_Search) -T(U_SearchExact) -T(WF_SeqToWords) -T(WF_SeqToWordsA) -T(WF_SeqToWordsB) -T(WF_AllocLA) -T(WF_AllocLB) -T(WF_AllocDiags) -T(WF_SetA) -T(WF_SetA_Nb) -T(WF_SetAZero) -T(WF_SetA2) -T(WF_SetB) -T(WF_GetCommonWordCount) -T(WF_GetBestDiag) -T(GetFractIdGivenPath) -T(WX_GetUniqueWords) -T(CompressPath) -T(GetHSPs1) -T(GetHSPs2) -T(AlignHSPs) -T(WF_ResolveHSPs) -T(WX_SetExcludes) -T(ViterbiFast) -T(ViterbiFastBand) -T(ViterbiFastBand0) -T(ViterbiFastBand1) -T(ViterbiFastBand2) -T(ViterbiFastBand3) -T(ViterbiFastBand4) -T(TraceBackBit) -T(TraceBackBitSW) -T(SF_GetNextSeq) -T(SF_FillCache) -T(OnGlobalAccept) -T(UngappedBlast) -T(UngappedBlastId) -T(UngappedBlast2Hit) -T(LogHSPs) -T(BlastOutput) -T(BlastLeft) -T(BlastRight) -T(Blast1) -T(Blast2) -T(Blast3) -T(Blast4) -T(GetBestSeg) -T(SWLinearDP) -T(SWLinearTB) -T(SWLinearDP2) -T(SWLinearTB2) -T(Chain) -T(XlatSeq) -T(XlatSeqToLetters) -T(XDropFwdSimple) -T(XDropFwdFast) -T(XDropFwdFastTB) -T(XDropBwd) -T(SWSimple) -T(PathAlloc) -T(SubPath) -T(SWUngapped) -T(SWFast) -T(SWFastNTB) -T(SWAT_CacheQuery) -T(SWAT_AlignTarget) -T(SWAT_CacheQueryNW) -T(SWAT_AlignTargetNW) -T(SeqDB_FromFasta) -T(LocalUngappedHitToAD) -T(LocalGappedHitToAD) -T(GlobalHitToAD) -T(ResolveOverlaps) -T(GetORFs) -T(ChainCov_AddHit) -T(ChainCov_EndQuery) -T(ChainCov_DoTarget) -T(BuildNb) -T(MakeIntSubstMx) -T(UngappedExtendLeft) -T(UngappedExtendRight) -T(AlignSP) -T(AlignHSP) - -// Background -T(Bg_SearchLoop) -T(Bg_MainInit) -T(Bg_MainTerm) -T(Bg_Other) -T(Bg_1) -T(Bg_2) -T(Bg_3) -T(Bg_4) -T(Bg_5) -T(Bg_6) -T(Bg_7) -T(Bg_8) -T(Bg_9) -T(Bg_XFrame2) -T(Bg_Usearch1) -T(Bg_Usearch2) -T(Bg_Usearch3) -T(Bg_Usearch4) -T(Bg_Hot) - -// For Timer2 -T(Search_2) -T(Search_Loop_2) -T(Search_InnerLoop_2) -T(OnHit_2) -T(UngappedBlast_2) -T(MainInit_2) -T(MainTerm_2) diff --git a/timing.h b/timing.h deleted file mode 100644 index 0a80aee..0000000 --- a/timing.h +++ /dev/null @@ -1,238 +0,0 @@ -#define TIMING 0 -#ifndef timing_h -#define timing_h - -#define BG_TIMING 0 - -#if !TIMING -#undef BG_TIMING -#define BG_TIMING 0 -#endif - -#if UCHIMES -#undef TIMING -#define TIMING 0 -#endif - -#if TIMING - -enum TIMER - { - TIMER_None, -#define T(x) TIMER_##x, -#include "timers.h" -#undef T - }; - -const unsigned TimerCount = - 1 // TIMER_None -#define T(x) +1 -#include "timers.h" -#undef T - ; - -enum COUNTER - { -#define C(x) COUNTER_##x, -#include "counters.h" -#undef C - }; - -enum ALLOCER - { -#define A(x) ALLOCER_##x, -#include "allocs.h" -#undef A - }; - -const unsigned CounterCount = -#define C(x) +1 -#include "counters.h" -#undef C - ; - -const unsigned AllocerCount = -#define A(x) +1 -#include "allocs.h" -#undef A - ; - -#ifdef _MSC_VER - -typedef unsigned __int64 TICKS; - -#pragma warning(disable:4035) -inline TICKS GetClockTicks() - { - _asm - { - _emit 0x0f - _emit 0x31 - } - } - -#else // ifdef _MSC_VER - -typedef uint64_t TICKS; -__inline__ uint64_t GetClockTicks() - { - uint32_t lo, hi; - /* We cannot use "=A", since this would use %rax on x86_64 */ - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (uint64_t)hi << 32 | lo; - } - -#endif // ifdef _MSC_VER - -//void AddTicks(const string &Name, TICKS Ticks1, TICKS Ticks2); -//void AddBytes(const string &Name, double Bytes); -//#define SubBytes(Name, Bytes) AddBytes(Name, -double(Bytes)) - -const char *TimerToStr(TIMER t); - -extern TICKS g_BeginTicks[TimerCount]; -extern double g_TotalTicks[TimerCount]; -extern double g_TotalCounts[TimerCount]; -extern double g_Counters[CounterCount]; -extern unsigned g_AllocNewCount[AllocerCount]; -extern unsigned g_AllocFreeCount[AllocerCount]; -extern double g_AllocNewBytes[AllocerCount]; -extern double g_AllocFreeBytes[AllocerCount]; -extern double g_AllocNetBytes[AllocerCount]; -extern double g_AllocPeakBytes[AllocerCount]; -extern bool g_Timer2[TimerCount]; -extern TIMER g_CurrTimer; -#if BG_TIMING -extern TIMER g_BackgroundTimer; -#endif - -#define MYALLOC(Type, N, Name) (Type *) MyAlloc_((N)*sizeof(Type), ALLOCER_##Name, __FILE__, __LINE__) -#define MYFREE(Array, N, Name) MyFree_(Array, N*sizeof(Array[0]), ALLOCER_##Name, __FILE__, __LINE__) - -inline void *MyAlloc_(unsigned Bytes, unsigned a, const char *FileName, int Line) - { - ++g_AllocNewCount[a]; - g_AllocNewBytes[a] += Bytes; - g_AllocNetBytes[a] += Bytes; - if (g_AllocNetBytes[a] > g_AllocPeakBytes[a]) - g_AllocPeakBytes[a] = g_AllocNetBytes[a]; - return mymalloc(Bytes); - } - -inline void MyFree_(void *p, unsigned Bytes, unsigned a, const char *FileName, int Line) - { - ++g_AllocFreeCount[a]; - g_AllocFreeBytes[a] += Bytes; - g_AllocNetBytes[a] -= Bytes; - myfree2(p, Bytes); - } - -#if BG_TIMING -inline void SetBackgroundTimer_(TIMER Timer) - { - TICKS Now = GetClockTicks(); - if (g_BeginTicks[g_BackgroundTimer] != 0) - { - ++g_TotalCounts[g_BackgroundTimer]; - g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]); - } - g_BackgroundTimer = Timer; - g_BeginTicks[Timer] = Now; - } -#else -#define SetBackgroundTimer_(Timer) /* empty */ -#endif - -inline void StartTimer_(TIMER Timer) - { - if (g_CurrTimer != TIMER_None) - Die("StartTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer)); - - TICKS Now = GetClockTicks(); -#if BG_TIMING - if (g_BeginTicks[g_BackgroundTimer] != 0) - { - ++g_TotalCounts[g_BackgroundTimer]; - g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]); - } -#endif - g_BeginTicks[Timer] = Now; - g_CurrTimer = Timer; - } - -inline void PauseTimer_(TIMER Timer) - { - if (Timer != g_CurrTimer) - Die("PauseTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer)); - - TICKS Now = GetClockTicks(); - g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]); - g_BeginTicks[Timer] = Now; - g_CurrTimer = TIMER_None; - } - -inline void EndTimer_(TIMER Timer) - { - if (Timer != g_CurrTimer) - Die("EndTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer)); - - TICKS Now = GetClockTicks(); -#if BG_TIMING - g_BeginTicks[g_BackgroundTimer] = Now; -#endif - g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]); - ++g_TotalCounts[Timer]; - g_CurrTimer = TIMER_None; - } - -inline void StartTimer2_(TIMER Timer) - { - g_Timer2[Timer] = true; - g_BeginTicks[Timer] = GetClockTicks(); - } - -inline void EndTimer2_(TIMER Timer) - { - g_TotalTicks[Timer] += double(GetClockTicks() - g_BeginTicks[Timer]); - ++g_TotalCounts[Timer]; - } - -#define AddCounter(x, N) g_Counters[COUNTER_##x] += N -#define IncCounter(x) ++(g_Counters[COUNTER_##x]) -#define StartTimer(x) StartTimer_(TIMER_##x) -#define PauseTimer(x) PauseTimer_(TIMER_##x) -#define EndTimer(x) EndTimer_(TIMER_##x) -#define StartTimer2(x) StartTimer2_(TIMER_##x) -#define EndTimer2(x) EndTimer2_(TIMER_##x) - -#if BG_TIMING -#define SetBackgroundTimer(x) SetBackgroundTimer_(TIMER_##x) -#else -#define SetBackgroundTimer(x) /* empty */ -#endif - -#else // if TIMING - -#define AddCounter(x, N) /* empty */ -#define IncCounter(x) /* empty */ -#define StartTimer(x) /* empty */ -#define PauseTimer(x) /* empty */ -#define EndTimer(x) /* empty */ -#define StartTimer2(x) /* empty */ -#define PauseTimer2(x) /* empty */ -#define EndTimer2(x) /* empty */ -#define SetBackgroundTimer(x) /* empty */ -#define MYALLOC(Type, N, Name) myalloc(Type, N) -#define MYFREE(Array, N, Name) myfree(Array) - -#endif // if TIMING - -void LogMemStats(); -void LogTickStats(); -void LogStats(); -void LogAllocs(); - -#define AddBytes(x, n) /* empty */ -#define SubBytes(x, n) /* empty */ - -#endif // if timing_h diff --git a/tracebackbit.cpp b/tracebackbit.cpp deleted file mode 100644 index 94159cd..0000000 --- a/tracebackbit.cpp +++ /dev/null @@ -1,180 +0,0 @@ -#include "dp.h" - -#define TRACE 0 - -Mx g_Mx_TBBit; -byte **g_TBBit; -float *g_DPRow1; -float *g_DPRow2; -static float *g_DPBuffer1; -static float *g_DPBuffer2; - -static unsigned g_CacheLB; - -void AllocBit(unsigned LA, unsigned LB) - { - g_Mx_TBBit.Alloc("TBBit", LA+1, LB+1); - g_TBBit = g_Mx_TBBit.GetData(); - if (LB > g_CacheLB) - { - MYFREE(g_DPBuffer1, g_CacheLB, AllocBit); - MYFREE(g_DPBuffer2, g_CacheLB, AllocBit); - - g_CacheLB = LB + 128; - - // Allow use of [-1] - //g_DPBuffer1 = myalloc(g_CacheLB+3); - //g_DPBuffer2 = myalloc(g_CacheLB+3); - g_DPBuffer1 = MYALLOC(float, g_CacheLB+3, AllocBit); - g_DPBuffer2 = MYALLOC(float, g_CacheLB+3, AllocBit); - g_DPRow1 = g_DPBuffer1 + 1; - g_DPRow2 = g_DPBuffer2 + 1; - } - } - -void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD) - { - PD.Alloc(LA+LB); - - StartTimer(TraceBackBit); - char *PathPtr = PD.Back; - *PathPtr = 0; - - byte **TB = g_TBBit; - -#if TRACE - Log("\n"); - Log("TraceBackBit\n"); -#endif - - size_t i = LA; - size_t j = LB; - for (;;) - { -#if TRACE - Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State); -#endif - if (i == 0 && j == 0) - break; - - --PathPtr; - *PathPtr = State; - - byte t; - switch (State) - { - case 'M': - asserta(i > 0 && j > 0); - t = TB[i-1][j-1]; - if (t & TRACEBITS_DM) - State = 'D'; - else if (t & TRACEBITS_IM) - State = 'I'; - else - State = 'M'; - --i; - --j; - break; - case 'D': - asserta(i > 0); - t = TB[i-1][j]; - if (t & TRACEBITS_MD) - State = 'M'; - else - State = 'D'; - --i; - break; - - case 'I': - asserta(j > 0); - t = TB[i][j-1]; - if (t & TRACEBITS_MI) - State = 'M'; - else - State = 'I'; - --j; - break; - - default: - Die("TraceBackBit, invalid state %c", State); - } - } - PD.Start = PathPtr; - EndTimer(TraceBackBit); - } - -void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj, - unsigned &Leni, unsigned &Lenj, PathData &PD) - { - PD.Alloc(LA+LB); - - StartTimer(TraceBackBitSW); - char *PathPtr = PD.Back; - *PathPtr = 0; - - byte **TB = g_TBBit; - -#if TRACE - Log("\n"); - Log("TraceBackBitSW\n"); -#endif - - unsigned i = Besti; - unsigned j = Bestj; - char State = 'M'; - for (;;) - { -#if TRACE - Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State); -#endif - --PathPtr; - *PathPtr = State; - - byte t; - switch (State) - { - case 'M': - asserta(i > 0 && j > 0); - t = TB[i-1][j-1]; - if (t & TRACEBITS_DM) - State = 'D'; - else if (t & TRACEBITS_IM) - State = 'I'; - else if (t & TRACEBITS_SM) - { - Leni = Besti - i + 1; - Lenj = Bestj - j + 1; - PD.Start = PathPtr; - EndTimer(TraceBackBitSW); - return; - } - else - State = 'M'; - --i; - --j; - break; - case 'D': - asserta(i > 0); - t = TB[i-1][j]; - if (t & TRACEBITS_MD) - State = 'M'; - else - State = 'D'; - --i; - break; - - case 'I': - asserta(j > 0); - t = TB[i][j-1]; - if (t & TRACEBITS_MI) - State = 'M'; - else - State = 'I'; - --j; - break; - - default: - Die("TraceBackBitSW, invalid state %c", State); - } - } - } diff --git a/uc.h b/uc.h deleted file mode 100644 index 3f6018a..0000000 --- a/uc.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef uc_h -#define uc_h - -#include "seqdb.h" -#include "seq.h" -#include "path.h" - -struct AlnData; - -int uchime_main(int, char**); - -class UCFile - { -public: - FILE *m_File; - byte *m_Data; - vector m_RecTypes; - vector m_PctIds; - vector m_Labels; - vector m_SeedLabels; - vector m_SeedIndexes; - vector m_CompressedPaths; - vector m_SeqLengths; - vector m_SortOrder; - vector m_Strands; - vector m_Los; - vector m_SeedLos; - -public: - UCFile(); - void Clear(bool ctor = false); - void Close(); - void FromFile(const string &FileName); - void FromClstr(const string &FileName); - void ToFile(const string &FileName); - unsigned GetRecordCount() const; - void LogMe() const; - void ToClstr(const string &FileName); - void ToFasta(const string &FileName, const SeqDB &Input, bool Reformat); - void Create(const string &FileName); - void Sort(); - void Flush() const; - - void WriteNotMatched(unsigned L, const char *Label) const; - void WriteLibSeed(unsigned SeedIndex, unsigned L, const char *Label) const; - void WriteNewSeed(unsigned SeedIndex, unsigned L, const char *Label) const; - void WriteHit(const SeqData &SA, const SeqData &SB, double FractId, - const PathData &PD) const; - void WriteReject(const SeqData &SA, const SeqData &SB, double FractId, - const char *Path) const; - void WriteHit(unsigned SeedIndex, unsigned L, double PctId, - const char *CompressedPath, char Strand, unsigned Lo, unsigned SeedLo, - const char *Label, const char *SeedLabel) const; - void WriteHit(const AlnData &AD); - void WriteLibCluster(unsigned SeedIndex, unsigned Size, double AvgId, - const char *Label) const; - void WriteNewCluster(unsigned SeedIndex, unsigned Size, double AvgId, - const char *Label) const; - void WriteSeqX(FILE *f, const byte *Seq, unsigned L, const char *CompressedPath) const; - }; - -#endif // uc_h diff --git a/uchime_main.cpp b/uchime_main.cpp deleted file mode 100644 index 77266c0..0000000 --- a/uchime_main.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "myutils.h" -#include "chime.h" -#include "seqdb.h" -#include "dp.h" -#include "ultra.h" -#include "hspfinder.h" -#include -#include - -bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, - const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF, - float MinFractId, ChimeHit2 &Hit); - -FILE *g_fUChime; -FILE *g_fUChimeAlns; -const vector *g_SortVecFloat; -bool g_UchimeDeNovo = false; - -void Usage() - { - printf("\n"); - printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION); - printf("http://www.drive5.com/uchime\n"); - printf("\n"); - printf("This software is donated to the public domain\n"); - printf("\n"); - - printf( -#include "help.h" - ); - } - -void SetBLOSUM62() - { - Die("SetBLOSUM62 not implemented"); - } - -void ReadSubstMx(const string &/*FileName*/, Mx &/*Mxf*/) - { - Die("ReadSubstMx not implemented"); - } - -void LogAllocs() - { - /*empty*/ - } - -static bool CmpDescVecFloat(unsigned i, unsigned j) - { - return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j]; - } - -void Range(vector &v, unsigned N) - { - v.clear(); - v.reserve(N); - for (unsigned i = 0; i < N; ++i) - v.push_back(i); - } - -void SortDescending(const vector &Values, vector &Order) - { - StartTimer(Sort); - const unsigned N = SIZE(Values); - Range(Order, N); - g_SortVecFloat = &Values; - sort(Order.begin(), Order.end(), CmpDescVecFloat); - EndTimer(Sort); - } - -float GetAbFromLabel(const string &Label) - { - vector Fields; - Split(Label, Fields, '/'); - const unsigned N = SIZE(Fields); - for (unsigned i = 0; i < N; ++i) - { - const string &Field = Fields[i]; - if (Field.substr(0, 3) == "ab=") - { - string a = Field.substr(3, string::npos); - return (float) atof(a.c_str()); - } - } - if (g_UchimeDeNovo) - Die("Missing abundance /ab=xx/ in label >%s", Label.c_str()); - return 0.0; - } - -int uchime_main(int argc, char *argv[]) - { - - MyCmdLine(argc, argv); - - if (argc < 2) - { - Usage(); - return 0; - } - - if (opt_version) - { - printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION); - return 0; - } - - printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION); - printf("by Robert C. Edgar\n"); - printf("http://drive5.com/uchime\n"); - printf("This code is donated to the public domain.\n"); - printf("\n"); - if (!optset_w) - opt_w = 8; - - float MinFractId = 0.95f; - if (optset_id) - MinFractId = (float) opt_id; - - Log("%8.2f minh\n", opt_minh); - Log("%8.2f xn\n", opt_xn); - Log("%8.2f dn\n", opt_dn); - Log("%8.2f xa\n", opt_xa); - Log("%8.2f mindiv\n", opt_mindiv); - Log("%8u maxp\n", opt_maxp); - - if (opt_input == "" && opt_uchime != "") - opt_input = opt_uchime; - - if (opt_input == "") - Die("Missing --input"); - - g_UchimeDeNovo = (opt_db == ""); - - if (opt_uchimeout != "") - g_fUChime = CreateStdioFile(opt_uchimeout); - - if (opt_uchimealns != "") - g_fUChimeAlns = CreateStdioFile(opt_uchimealns); - - SeqDB Input; - SeqDB DB; - - Input.FromFasta(opt_input); - if (!Input.IsNucleo()) - Die("Input contains amino acid sequences"); - - const unsigned QuerySeqCount = Input.GetSeqCount(); - vector Order; - for (unsigned i = 0; i < QuerySeqCount; ++i) - Order.push_back(i); - - if (g_UchimeDeNovo) - { - vector Abs; - for (unsigned i = 0; i < QuerySeqCount; ++i) - { - const char *Label = Input.GetLabel(i); - float Ab = GetAbFromLabel(Label); - Abs.push_back(Ab); - } - SortDescending(Abs, Order); - DB.m_IsNucleoSet = true; - DB.m_IsNucleo = true; - } - else - { - DB.FromFasta(opt_db); - if (!DB.IsNucleo()) - Die("Database contains amino acid sequences"); - } - - vector Hits; - unsigned HitCount = 0; - for (unsigned i = 0; i < QuerySeqCount; ++i) - { - unsigned QuerySeqIndex = Order[i]; - - SeqData QSD; - Input.GetSeqData(QuerySeqIndex, QSD); - - float QAb = -1.0; - if (g_UchimeDeNovo) - QAb = GetAbFromLabel(QSD.Label); - - ChimeHit2 Hit; - AlnParams &AP = *(AlnParams *) 0; - AlnHeuristics &AH = *(AlnHeuristics *) 0; - HSPFinder &HF = *(HSPFinder *) 0; - bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit); - if (Found) - ++HitCount; - else - { - if (g_UchimeDeNovo) - DB.AddSeq(QSD.Label, QSD.Seq, QSD.L); - } - - WriteChimeHit(g_fUChime, Hit); - - ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1)); - } - - Log("\n"); - Log("%s: %u/%u chimeras found (%.1f%%)\n", - opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount)); - - CloseStdioFile(g_fUChime); - CloseStdioFile(g_fUChimeAlns); - - ProgressExit(); - return 0; - } diff --git a/ultra.h b/ultra.h deleted file mode 100644 index e0a432f..0000000 --- a/ultra.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef ultra_h -#define ultra_h - -#include "seqdb.h" -#define Ultra SeqDB -#define GetSeedLabel GetLabel - -#endif // ultra_h diff --git a/usort.cpp b/usort.cpp deleted file mode 100644 index 7afbf42..0000000 --- a/usort.cpp +++ /dev/null @@ -1,86 +0,0 @@ -//#if UCHIMES - -#include "myutils.h" -#include "seqdb.h" -#include "seq.h" -#include "alpha.h" - -void SortDescending(const vector &Values, vector &Order); - -static byte *g_QueryHasWord; -static unsigned g_WordCount; - -unsigned GetWord(const byte *Seq) - { - unsigned Word = 0; - const byte *Front = Seq; - for (unsigned i = 0; i < opt_w; ++i) - { - unsigned Letter = g_CharToLetterNucleo[*Front++]; - Word = (Word*4) + Letter; - } - return Word; - } - -static void SetQuery(const SeqData &Query) - { - if (g_QueryHasWord == 0) - { - g_WordCount = 4; - for (unsigned i = 1; i < opt_w; ++i) - g_WordCount *= 4; - - g_QueryHasWord = myalloc(byte, g_WordCount); - } - - memset(g_QueryHasWord, 0, g_WordCount); - - if (Query.L <= opt_w) - return; - - const unsigned L = Query.L - opt_w + 1; - const byte *Seq = Query.Seq; - for (unsigned i = 0; i < L; ++i) - { - unsigned Word = GetWord(Seq++); - g_QueryHasWord[Word] = 1; - } - } - -static unsigned GetUniqueWordsInCommon(const SeqData &Target) - { - if (Target.L <= opt_w) - return 0; - - unsigned Count = 0; - const unsigned L = Target.L - opt_w + 1; - const byte *Seq = Target.Seq; - for (unsigned i = 0; i < L; ++i) - { - unsigned Word = GetWord(Seq++); - if (g_QueryHasWord[Word]) - ++Count; - } - return Count; - } - -void USort(const SeqData &Query, const SeqDB &DB, vector &WordCounts, - vector &Order) - { - WordCounts.clear(); - Order.clear(); - - SetQuery(Query); - - const unsigned SeqCount = DB.GetSeqCount(); - for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex) - { - SeqData Target; - DB.GetSeqData(SeqIndex, Target); - float WordCount = (float) GetUniqueWordsInCommon(Target); - WordCounts.push_back(WordCount); - } - SortDescending(WordCounts, Order); - } - -//#endif // UCHIMES diff --git a/viterbifast.cpp b/viterbifast.cpp deleted file mode 100644 index 2b20174..0000000 --- a/viterbifast.cpp +++ /dev/null @@ -1,378 +0,0 @@ -#include "dp.h" -#include "out.h" -#include "evalue.h" - -#define CMP_SIMPLE 0 - -#if SAVE_FAST -static Mx g_MxDPM; -static Mx g_MxDPD; -static Mx g_MxDPI; - -static Mx g_MxTBM; -static Mx g_MxTBD; -static Mx g_MxTBI; - -static float **g_DPM; -static float **g_DPD; -static float **g_DPI; - -static char **g_TBM; -static char **g_TBD; -static char **g_TBI; - -#if CMP_SIMPLE -static Mx *g_DPMSimpleMx; -static Mx *g_DPDSimpleMx; -static Mx *g_DPISimpleMx; -static float **g_DPMSimple; -static float **g_DPDSimple; -static float **g_DPISimple; - -#define cmpm(i, j, x) { if (!feq(x, g_DPMSimple[i][j])) \ - { \ - Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \ - __FILE__, __LINE__, x, i, j, g_DPMSimple[i][j]); \ - } \ - } - -#define cmpd(i, j, x) { if (!feq(x, g_DPDSimple[i][j])) \ - { \ - Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \ - __FILE__, __LINE__, x, i, j, g_DPDSimple[i][j]); \ - } \ - } - -#define cmpi(i, j, x) { if (!feq(x, g_DPISimple[i][j])) \ - { \ - Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \ - __FILE__, __LINE__, x, i, j, g_DPISimple[i][j]); \ - } \ - } - -#else - -#define cmpm(i, j, x) /* empty */ -#define cmpd(i, j, x) /* empty */ -#define cmpi(i, j, x) /* empty */ - -#endif - -static void AllocSave(unsigned LA, unsigned LB) - { -#if CMP_SIMPLE - GetSimpleDPMxs(&g_DPMSimpleMx, &g_DPDSimpleMx, &g_DPISimpleMx); - g_DPMSimple = g_DPMSimpleMx->GetData(); - g_DPDSimple = g_DPDSimpleMx->GetData(); - g_DPISimple = g_DPISimpleMx->GetData(); -#endif - g_MxDPM.Alloc("FastM", LA+1, LB+1); - g_MxDPD.Alloc("FastD", LA+1, LB+1); - g_MxDPI.Alloc("FastI", LA+1, LB+1); - - g_MxTBM.Alloc("FastTBM", LA+1, LB+1); - g_MxTBD.Alloc("FastTBD", LA+1, LB+1); - g_MxTBI.Alloc("FastTBI", LA+1, LB+1); - - g_DPM = g_MxDPM.GetData(); - g_DPD = g_MxDPD.GetData(); - g_DPI = g_MxDPI.GetData(); - - g_TBM = g_MxTBM.GetData(); - g_TBD = g_MxTBD.GetData(); - g_TBI = g_MxTBI.GetData(); - } - -static void SAVE_DPM(unsigned i, unsigned j, float x) - { - g_DPM[i][j] = x; -#if CMP_SIMPLE - if (i > 0 && j > 0) - asserta(feq(x, g_DPMSimple[i][j])); -#endif - } - -static void SAVE_DPD(unsigned i, unsigned j, float x) - { - g_DPD[i][j] = x; -#if CMP_SIMPLE - if (i > 0 && j > 0) - asserta(feq(x, g_DPDSimple[i][j])); -#endif - } - -static void SAVE_DPI(unsigned i, unsigned j, float x) - { - g_DPI[i][j] = x; -#if CMP_SIMPLE - if (i > 0 && j > 0) - asserta(feq(x, g_DPISimple[i][j])); -#endif - } - -static void SAVE_TBM(unsigned i, unsigned j, char x) - { - g_TBM[i][j] = x; - } - -static void SAVE_TBD(unsigned i, unsigned j, char x) - { - g_TBD[i][j] = x; - } - -static void SAVE_TBI(unsigned i, unsigned j, char x) - { - g_TBI[i][j] = x; - } - -void GetFastMxs(Mx **M, Mx **D, Mx **I) - { - *M = &g_MxDPM; - *D = &g_MxDPD; - *I = &g_MxDPI; - } - -#else // SAVE_FAST - -#define SAVE_DPM(i, j, x) /* empty */ -#define SAVE_DPD(i, j, x) /* empty */ -#define SAVE_DPI(i, j, x) /* empty */ - -#define SAVE_TBM(i, j, x) /* empty */ -#define SAVE_TBD(i, j, x) /* empty */ -#define SAVE_TBI(i, j, x) /* empty */ - -#define AllocSave(LA, LB) /* empty */ - -#define cmpm(i, j, x) /* empty */ -#define cmpd(i, j, x) /* empty */ -#define cmpi(i, j, x) /* empty */ - -#endif // SAVE_FAST - -float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB, - const AlnParams &AP, PathData &PD) - { - if (LA*LB > 100*1000*1000) - Die("ViterbiFast, too long LA=%u, LB=%u", LA, LB); - - AllocBit(LA, LB); - AllocSave(LA, LB); - - StartTimer(ViterbiFast); - - const float * const *Mx = AP.SubstMx; - float OpenA = AP.LOpenA; - float ExtA = AP.LExtA; - - byte **TB = g_TBBit; - float *Mrow = g_DPRow1; - float *Drow = g_DPRow2; - -// Use Mrow[-1], so... - Mrow[-1] = MINUS_INFINITY; - for (unsigned j = 0; j <= LB; ++j) - { - Mrow[j] = MINUS_INFINITY; - SAVE_DPM(0, j, MINUS_INFINITY); - SAVE_TBM(0, j, '?'); - - Drow[j] = MINUS_INFINITY; - SAVE_DPD(0, j, MINUS_INFINITY); - SAVE_TBD(0, j, '?'); - } - -// Main loop - float M0 = float (0); - SAVE_DPM(0, 0, 0); - for (unsigned i = 0; i < LA; ++i) - { - byte a = A[i]; - const float *MxRow = Mx[a]; - float OpenB = AP.LOpenB; - float ExtB = AP.LExtB; - float I0 = MINUS_INFINITY; - - SAVE_TBM(i, 0, '?'); - - SAVE_DPI(i, 0, MINUS_INFINITY); - SAVE_DPI(i, 1, MINUS_INFINITY); - - SAVE_TBI(i, 0, '?'); - SAVE_TBI(i, 1, '?'); - - byte *TBrow = TB[i]; - for (unsigned j = 0; j < LB; ++j) - { - byte b = B[j]; - byte TraceBits = 0; - float SavedM0 = M0; - - // MATCH - { - // M0 = DPM[i][j] - // I0 = DPI[i][j] - // Drow[j] = DPD[i][j] - cmpm(i, j, M0); - cmpd(i, j, Drow[j]); - cmpi(i, j, I0); - - float xM = M0; - SAVE_TBM(i+1, j+1, 'M'); - if (Drow[j] > xM) - { - xM = Drow[j]; - TraceBits = TRACEBITS_DM; - SAVE_TBM(i+1, j+1, 'D'); - } - if (I0 > xM) - { - xM = I0; - TraceBits = TRACEBITS_IM; - SAVE_TBM(i+1, j+1, 'I'); - } - M0 = Mrow[j]; - cmpm(i, j+1, M0); - - Mrow[j] = xM + MxRow[b]; - // Mrow[j] = DPM[i+1][j+1]) - SAVE_DPM(i+1, j+1, Mrow[j]); - } - - // DELETE - { - // SavedM0 = DPM[i][j] - // Drow[j] = DPD[i][j] - cmpm(i, j, SavedM0); - cmpd(i, j, Drow[j]); - - float md = SavedM0 + OpenB; - Drow[j] += ExtB; - SAVE_TBD(i+1, j, 'D'); - if (md >= Drow[j]) - { - Drow[j] = md; - TraceBits |= TRACEBITS_MD; - SAVE_TBD(i+1, j, 'M'); - } - // Drow[j] = DPD[i+1][j] - SAVE_DPD(i+1, j, Drow[j]); - } - - // INSERT - { - // SavedM0 = DPM[i][j] - // I0 = DPI[i][j] - cmpm(i, j, SavedM0); - cmpi(i, j, I0); - - float mi = SavedM0 + OpenA; - I0 += ExtA; - SAVE_TBI(i, j+1, 'I'); - if (mi >= I0) - { - I0 = mi; - TraceBits |= TRACEBITS_MI; - SAVE_TBI(i, j+1, 'M'); - } - // I0 = DPI[i][j+1] - SAVE_DPI(i, j+1, I0); - } - - OpenB = AP.OpenB; - ExtB = AP.ExtB; - - TBrow[j] = TraceBits; - } - - // Special case for end of Drow[] - { - // M0 = DPM[i][LB] - // Drow[LB] = DPD[i][LB] - - TBrow[LB] = 0; - float md = M0 + AP.ROpenB; - Drow[LB] += AP.RExtB; - SAVE_TBD(i+1, LB, 'D'); - if (md >= Drow[LB]) - { - Drow[LB] = md; - TBrow[LB] = TRACEBITS_MD; - SAVE_TBD(i+1, LB, 'M'); - } - // Drow[LB] = DPD[i+1][LB] - SAVE_DPD(i+1, LB, Drow[LB]); - } - - SAVE_DPM(i+1, 0, MINUS_INFINITY); - M0 = MINUS_INFINITY; - - OpenA = AP.OpenA; - ExtA = AP.ExtA; - } - - SAVE_TBM(LA, 0, '?'); - -// Special case for last row of DPI - byte *TBrow = TB[LA]; - float I1 = MINUS_INFINITY; - - SAVE_DPI(LA, 0, MINUS_INFINITY); - SAVE_TBI(LA, 0, '?'); - - SAVE_DPI(LA, 1, MINUS_INFINITY); - SAVE_TBI(LA, 1, '?'); - - for (unsigned j = 1; j < LB; ++j) - { - // Mrow[j-1] = DPM[LA][j] - // I1 = DPI[LA][j] - - TBrow[j] = 0; - float mi = Mrow[int(j)-1] + AP.ROpenA; - I1 += AP.RExtA; - SAVE_TBI(LA, j+1, 'I'); - if (mi > I1) - { - I1 = mi; - TBrow[j] = TRACEBITS_MI; - SAVE_TBI(LA, j+1, 'M'); - } - SAVE_DPI(LA, j+1, I1); - } - - float FinalM = Mrow[LB-1]; - float FinalD = Drow[LB]; - float FinalI = I1; -// FinalM = DPM[LA][LB] -// FinalD = DPD[LA][LB] -// FinalI = DPI[LA][LB] - - float Score = FinalM; - byte State = 'M'; - if (FinalD > Score) - { - Score = FinalD; - State = 'D'; - } - if (FinalI > Score) - { - Score = FinalI; - State = 'I'; - } - - EndTimer(ViterbiFast); - TraceBackBit(LA, LB, State, PD); - -#if SAVE_FAST - g_MxDPM.LogMe(); - g_MxDPD.LogMe(); - g_MxDPI.LogMe(); - - g_MxTBM.LogMe(); - g_MxTBD.LogMe(); - g_MxTBI.LogMe(); -#endif - - return Score; - } diff --git a/windex.h b/windex.h deleted file mode 100644 index 0b324ca..0000000 --- a/windex.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef windex_h -#define windex_h - -class SFasta; -struct SeqDB; - -typedef uint32 word_t; -typedef uint16 wordcount_t; -typedef uint32 arrsize_t; -typedef uint16 seqcountperword_t; -typedef uint32 seqindex_t; -typedef uint16 commonwordcount_t; - -const uint32 WindexFileHdr_Magic1 = 0x312DE41; -const uint32 WindexFileHdr_Magic2 = 0x312DE42; -const uint32 WindexFileHdr_Magic3 = 0x312DE43; -const uint32 WindexFileHdr_Magic4 = 0x312DE44; - -struct WindexFileHdr - { - uint32 Magic1; - uint32 IsNucleo; - uint32 WordLength; - uint32 Magic2; - }; - -class Windex - { -public: - bool m_Nucleo; - bool m_RedAlpha; - unsigned m_WordLength; - unsigned m_AlphaSize; - unsigned m_WordCount; - unsigned m_Hi; - unsigned m_CapacityInc; - arrsize_t *m_Capacities; - arrsize_t *m_Sizes; - float *m_WordScores; - seqindex_t **m_SeedIndexes; - byte *m_UniqueCounts; - unsigned m_CharToLetter[256]; - -public: - Windex(); - void ToFile(const string &FileName) const; - void FromFile(const string &FileName); - void FromSFasta(SFasta &SF); - void FromSeqDB(const SeqDB &DB); - void Clear(bool ctor = false); - void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N); - void Init(bool Nucleo, unsigned WordLength); - void Init2(bool Nucleo, unsigned TableSize); - void InitRed(unsigned WordLength); - void InitWordScores(const float *const *SubstMx); - void Reset(); - void LogMe() const; - unsigned LogMemSize() const; - void LogWordStats(unsigned TopWords = 10) const; - const char *WordToStr(word_t Word) const; - word_t SeqToWord(const byte *Seq) const; - unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const; - unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const; - unsigned WordsToCounts(const word_t *Words, unsigned N, - word_t *UniqueWords, seqcountperword_t *Counts) const; - unsigned GetUniqueWords(const word_t *Words, unsigned N, - word_t *UniqueWords) const; - void LogSizeHisto() const; - }; - -#endif // windex_h diff --git a/writechhit.cpp b/writechhit.cpp deleted file mode 100644 index ea67061..0000000 --- a/writechhit.cpp +++ /dev/null @@ -1,329 +0,0 @@ -#include "myutils.h" -#include "chime.h" - -void WriteChimeFileHdr(FILE *f) - { - if (f == 0) - return; - - fprintf(f, - "\tQuery" // 1 - "\tA" // 2 - "\tB" // 3 - "\tIdQM" // 4 - "\tIdQA" // 5 - "\tIdQB" // 6 - "\tIdAB" // 7 - "\tIdQT" // 8 - "\tLY" // 9 - "\tLN" // 10 - "\tLA" // 11 - "\tRY" // 12 - "\tRN" // 13 - "\tRA" // 14 - "\tDiv" // 15 - "\tY" // 16 - "\n" - ); - } - -void WriteChimeHit(FILE *f, const ChimeHit2 &Hit) - { - if (f == 0) - return; - - if (Hit.Div <= 0.0) - { - fprintf(f, "0.0000"); // 0 - - fprintf(f, - "\t%s", Hit.QLabel.c_str()); // 1 - - fprintf(f, - "\t*" // 2 - "\t*" // 3 - "\t*" // 4 - "\t*" // 5 - "\t*" // 6 - "\t*" // 7 - "\t*" // 8 - "\t*" // 9 - "\t*" // 10 - "\t*" // 11 - "\t*" // 12 - "\t*" // 13 - "\t*" // 14 - "\t*" // 15 - "\tN" // 16 - "\n" - ); - return; - } - - fprintf(f, "%.4f", Hit.Score); // 0 - - fputc('\t', f); - fputs(Hit.QLabel.c_str(), f); // 1 - - fputc('\t', f); - fputs(Hit.ALabel.c_str(), f); // 2 - - fputc('\t', f); - fputs(Hit.BLabel.c_str(), f); // 3 - - fprintf(f, "\t%.1f", Hit.PctIdQM); // 4 - fprintf(f, "\t%.1f", Hit.PctIdQA); // 5 - fprintf(f, "\t%.1f", Hit.PctIdQB); // 6 - fprintf(f, "\t%.1f", Hit.PctIdAB); // 7 - fprintf(f, "\t%.1f", Hit.PctIdQT); // 8 - - fprintf(f, "\t%u", Hit.CS_LY); // 9 - fprintf(f, "\t%u", Hit.CS_LN); // 10 - fprintf(f, "\t%u", Hit.CS_LA); // 11 - - fprintf(f, "\t%u", Hit.CS_RY); // 12 - fprintf(f, "\t%u", Hit.CS_RN); // 13 - fprintf(f, "\t%u", Hit.CS_RA); // 14 - - fprintf(f, "\t%.2f", Hit.Div); // 15 - - fprintf(f, "\t%c", yon(Hit.Accept())); // 16 - fputc('\n', f); - } - -unsigned GetUngappedLength(const byte *Seq, unsigned L) - { - unsigned UL = 0; - for (unsigned i = 0; i < L; ++i) - if (!isgap(Seq[i])) - ++UL; - return UL; - } - -void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit) - { - if (f == 0) - return; - - if (Hit.Div <= 0.0) - return; - - const string &Q3 = Hit.Q3; - const string &A3 = Hit.A3; - const string &B3 = Hit.B3; - - const byte *Q3Seq = (const byte *) Q3.c_str(); - const byte *A3Seq = (const byte *) A3.c_str(); - const byte *B3Seq = (const byte *) B3.c_str(); - -// Aligned - unsigned ColCount = SIZE(Q3); - asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount); - - unsigned LQ = GetUngappedLength(Q3Seq, ColCount); - unsigned LA = GetUngappedLength(A3Seq, ColCount); - unsigned LB = GetUngappedLength(B3Seq, ColCount); - - fprintf(f, "\n"); - fprintf(f, "------------------------------------------------------------------------\n"); - fprintf(f, "Query (%5u nt) %s\n", LQ, Hit.QLabel.c_str()); - fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str()); - fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str()); - -// Strip terminal gaps in query - unsigned FromCol = UINT_MAX; - unsigned ToCol = UINT_MAX; - for (unsigned Col = 0; Col < ColCount; ++Col) - { - if (!isgap(Q3Seq[Col])) - { - if (FromCol == UINT_MAX) - FromCol = Col; - ToCol = Col; - } - } - - unsigned QPos = 0; - unsigned APos = 0; - unsigned BPos = 0; - for (unsigned Col = 0; Col < FromCol; ++Col) - { - if (!isgap(A3Seq[Col])) - ++APos; - if (!isgap(B3Seq[Col])) - ++BPos; - } - - unsigned Range = ToCol - FromCol + 1; - unsigned RowCount = (Range + 79)/80; - unsigned RowFromCol = FromCol; - for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex) - { - fprintf(f, "\n"); - unsigned RowToCol = RowFromCol + 79; - if (RowToCol > ToCol) - RowToCol = ToCol; - - // A row - fprintf(f, "A %5u ", APos + 1); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - if (a != q) - a = tolower(a); - fprintf(f, "%c", a); - if (!isgap(a)) - ++APos; - } - fprintf(f, " %u\n", APos); - - // Q row - fprintf(f, "Q %5u ", QPos + 1); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - char q = Q3Seq[Col]; - fprintf(f, "%c", q); - if (!isgap(q)) - ++QPos; - } - fprintf(f, " %u\n", QPos); - - // B row - fprintf(f, "B %5u ", BPos + 1); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - char q = Q3Seq[Col]; - char b = B3Seq[Col]; - if (b != q) - b = tolower(b); - fprintf(f, "%c", b); - if (!isgap(b)) - ++BPos; - } - fprintf(f, " %u\n", BPos); - - // Diffs - fprintf(f, "Diffs "); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - char c = ' '; - if (isgap(q) || isgap(a) || isgap(b)) - c = ' '; - else if (Col < Hit.ColXLo) - { - if (q == a && q == b) - c = ' '; - else if (q == a && q != b) - c = 'A'; - else if (q == b && q != a) - c = 'b'; - else if (a == b && q != a) - c = 'N'; - else - c = '?'; - } - else if (Col > Hit.ColXHi) - { - if (q == a && q == b) - c = ' '; - else if (q == b && q != a) - c = 'B'; - else if (q == a && q != b) - c = 'a'; - else if (a == b && q != a) - c = 'N'; - else - c = '?'; - } - - fprintf(f, "%c", c); - } - fprintf(f, "\n"); - - // SNPs - fprintf(f, "Votes "); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - char q = Q3Seq[Col]; - char a = A3Seq[Col]; - char b = B3Seq[Col]; - - bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])); - bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])); - - char c = ' '; - if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap) - c = ' '; - else if (Col < Hit.ColXLo) - { - if (q == a && q == b) - c = ' '; - else if (q == a && q != b) - c = '+'; - else if (q == b && q != a) - c = '!'; - else - c = '0'; - } - else if (Col > Hit.ColXHi) - { - if (q == a && q == b) - c = ' '; - else if (q == b && q != a) - c = '+'; - else if (q == a && q != b) - c = '!'; - else - c = '0'; - } - - fprintf(f, "%c", c); - } - fprintf(f, "\n"); - - // LR row - fprintf(f, "Model "); - for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col) - { - if (Col < Hit.ColXLo) - fprintf(f, "A"); - else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi) - fprintf(f, "x"); - else - fprintf(f, "B"); - } - - fprintf(f, "\n"); - - RowFromCol += 80; - } - fprintf(f, "\n"); - - double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB); - double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP; - - unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA; - unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA; - - double PctL = Pct(Hit.CS_LY, LTot); - double PctR = Pct(Hit.CS_RY, RTot); - - fprintf(f, - "Ids. QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n", - Hit.PctIdQA, - Hit.PctIdQB, - Hit.PctIdAB, - Hit.PctIdQM, - Div); - - fprintf(f, - "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n", - LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL, - RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR, - Hit.Score); - }