]> git.donarmstrong.com Git - mothur.git/commitdiff
added chimera.uchime
authorwestcott <westcott>
Mon, 16 May 2011 19:22:44 +0000 (19:22 +0000)
committerwestcott <westcott>
Mon, 16 May 2011 19:22:44 +0000 (19:22 +0000)
61 files changed:
Mothur.xcodeproj/project.pbxproj
addtargets2.cpp [new file with mode: 0644]
alignchime.cpp [new file with mode: 0644]
alignchimel.cpp [new file with mode: 0644]
allocs.h [new file with mode: 0644]
alnheuristics.h [new file with mode: 0644]
alnparams.cpp [new file with mode: 0644]
alnparams.h [new file with mode: 0644]
alpha.cpp [new file with mode: 0644]
alpha.h [new file with mode: 0644]
alpha2.cpp [new file with mode: 0644]
blastdb.cpp
chainer.h [new file with mode: 0644]
chime.h [new file with mode: 0644]
chimerauchimecommand.cpp
chimerauchimecommand.h [new file with mode: 0644]
collectsharedcommand.cpp
commandfactory.cpp
diagbox.h [new file with mode: 0644]
dp.h [new file with mode: 0644]
evalue.h [new file with mode: 0644]
fractid.cpp [new file with mode: 0644]
getparents.cpp [new file with mode: 0644]
globalalign2.cpp [new file with mode: 0644]
help.h [new file with mode: 0644]
hsp.h [new file with mode: 0644]
hspfinder.h [new file with mode: 0644]
make3way.cpp [new file with mode: 0644]
mothurout.cpp
mothurout.h
mx.cpp [new file with mode: 0644]
mx.h [new file with mode: 0644]
myopts.h [new file with mode: 0644]
myutils.cpp [new file with mode: 0755]
myutils.h [new file with mode: 0644]
orf.h [new file with mode: 0644]
out.h [new file with mode: 0644]
path.cpp [new file with mode: 0644]
path.h [new file with mode: 0644]
searchchime.cpp [new file with mode: 0644]
seq.h [new file with mode: 0644]
seqdb.cpp [new file with mode: 0644]
seqdb.h [new file with mode: 0644]
seqsummarycommand.cpp
setnucmx.cpp [new file with mode: 0644]
sfasta.cpp [new file with mode: 0644]
sfasta.h [new file with mode: 0644]
summarysharedcommand.cpp
svnmods.h [new file with mode: 0644]
svnversion.h [new file with mode: 0644]
timers.h [new file with mode: 0644]
timing.h [new file with mode: 0644]
tracebackbit.cpp [new file with mode: 0644]
uc.h [new file with mode: 0644]
uchime_main.cpp [new file with mode: 0644]
ultra.h [new file with mode: 0644]
usort.cpp [new file with mode: 0644]
validcalculator.cpp
viterbifast.cpp [new file with mode: 0644]
windex.h [new file with mode: 0644]
writechhit.cpp [new file with mode: 0644]

index 49ad9cceb6280f8cf38ce8a451185fdb5bda38f0..e80c7330c4b9b7b9dccacaf6d6a9bfc481a437f2 100644 (file)
                A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
                A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
+               A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; };
+               A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; };
+               A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; };
+               A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365A137DAB8300332B0C /* alnparams.cpp */; };
+               A74D368B137DAB8400332B0C /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365C137DAB8300332B0C /* alpha.cpp */; };
+               A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365E137DAB8300332B0C /* alpha2.cpp */; };
+               A74D368D137DAB8400332B0C /* fractid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3664137DAB8300332B0C /* fractid.cpp */; };
+               A74D368E137DAB8400332B0C /* getparents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3665137DAB8300332B0C /* getparents.cpp */; };
+               A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3666137DAB8300332B0C /* globalalign2.cpp */; };
+               A74D3690137DAB8400332B0C /* make3way.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366A137DAB8300332B0C /* make3way.cpp */; };
+               A74D3691137DAB8400332B0C /* mx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366B137DAB8300332B0C /* mx.cpp */; };
+               A74D3692137DAB8400332B0C /* myutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366E137DAB8300332B0C /* myutils.cpp */; };
+               A74D3693137DAB8400332B0C /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3672137DAB8300332B0C /* path.cpp */; };
+               A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3674137DAB8300332B0C /* searchchime.cpp */; };
+               A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3676137DAB8300332B0C /* seqdb.cpp */; };
+               A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3678137DAB8300332B0C /* setnucmx.cpp */; };
+               A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3679137DAB8300332B0C /* sfasta.cpp */; };
+               A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D367F137DAB8300332B0C /* tracebackbit.cpp */; };
+               A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3681137DAB8300332B0C /* uchime_main.cpp */; };
+               A74D369A137DAB8400332B0C /* usort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3683137DAB8300332B0C /* usort.cpp */; };
+               A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3684137DAB8300332B0C /* viterbifast.cpp */; };
+               A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; };
+               A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
                A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
                A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
                A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
                A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
+               A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = "<group>"; };
+               A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = "<group>"; };
+               A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = "<group>"; };
+               A74D3658137DAB8300332B0C /* allocs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocs.h; sourceTree = "<group>"; };
+               A74D3659137DAB8300332B0C /* alnheuristics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnheuristics.h; sourceTree = "<group>"; };
+               A74D365A137DAB8300332B0C /* alnparams.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alnparams.cpp; sourceTree = "<group>"; };
+               A74D365B137DAB8300332B0C /* alnparams.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnparams.h; sourceTree = "<group>"; };
+               A74D365C137DAB8300332B0C /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
+               A74D365D137DAB8300332B0C /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
+               A74D365E137DAB8300332B0C /* alpha2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha2.cpp; sourceTree = "<group>"; };
+               A74D365F137DAB8300332B0C /* chainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chainer.h; sourceTree = "<group>"; };
+               A74D3660137DAB8300332B0C /* chime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chime.h; sourceTree = "<group>"; };
+               A74D3661137DAB8300332B0C /* diagbox.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diagbox.h; sourceTree = "<group>"; };
+               A74D3662137DAB8300332B0C /* dp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dp.h; sourceTree = "<group>"; };
+               A74D3663137DAB8300332B0C /* evalue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = evalue.h; sourceTree = "<group>"; };
+               A74D3664137DAB8300332B0C /* fractid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fractid.cpp; sourceTree = "<group>"; };
+               A74D3665137DAB8300332B0C /* getparents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getparents.cpp; sourceTree = "<group>"; };
+               A74D3666137DAB8300332B0C /* globalalign2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = globalalign2.cpp; sourceTree = "<group>"; };
+               A74D3667137DAB8300332B0C /* help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = help.h; sourceTree = "<group>"; };
+               A74D3668137DAB8300332B0C /* hsp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hsp.h; sourceTree = "<group>"; };
+               A74D3669137DAB8300332B0C /* hspfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hspfinder.h; sourceTree = "<group>"; };
+               A74D366A137DAB8300332B0C /* make3way.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = make3way.cpp; sourceTree = "<group>"; };
+               A74D366B137DAB8300332B0C /* mx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mx.cpp; sourceTree = "<group>"; };
+               A74D366C137DAB8300332B0C /* mx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mx.h; sourceTree = "<group>"; };
+               A74D366D137DAB8300332B0C /* myopts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myopts.h; sourceTree = "<group>"; };
+               A74D366E137DAB8300332B0C /* myutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myutils.cpp; sourceTree = "<group>"; };
+               A74D366F137DAB8300332B0C /* myutils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myutils.h; sourceTree = "<group>"; };
+               A74D3670137DAB8300332B0C /* orf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = orf.h; sourceTree = "<group>"; };
+               A74D3671137DAB8300332B0C /* out.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = out.h; sourceTree = "<group>"; };
+               A74D3672137DAB8300332B0C /* path.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = path.cpp; sourceTree = "<group>"; };
+               A74D3673137DAB8300332B0C /* path.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = path.h; sourceTree = "<group>"; };
+               A74D3674137DAB8300332B0C /* searchchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = searchchime.cpp; sourceTree = "<group>"; };
+               A74D3675137DAB8300332B0C /* seq.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seq.h; sourceTree = "<group>"; };
+               A74D3676137DAB8300332B0C /* seqdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqdb.cpp; sourceTree = "<group>"; };
+               A74D3677137DAB8300332B0C /* seqdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqdb.h; sourceTree = "<group>"; };
+               A74D3678137DAB8300332B0C /* setnucmx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setnucmx.cpp; sourceTree = "<group>"; };
+               A74D3679137DAB8300332B0C /* sfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sfasta.cpp; sourceTree = "<group>"; };
+               A74D367A137DAB8300332B0C /* sfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sfasta.h; sourceTree = "<group>"; };
+               A74D367B137DAB8300332B0C /* svnmods.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnmods.h; sourceTree = "<group>"; };
+               A74D367C137DAB8300332B0C /* svnversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnversion.h; sourceTree = "<group>"; };
+               A74D367D137DAB8300332B0C /* timers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timers.h; sourceTree = "<group>"; };
+               A74D367E137DAB8300332B0C /* timing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timing.h; sourceTree = "<group>"; };
+               A74D367F137DAB8300332B0C /* tracebackbit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tracebackbit.cpp; sourceTree = "<group>"; };
+               A74D3680137DAB8300332B0C /* uc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uc.h; sourceTree = "<group>"; };
+               A74D3681137DAB8300332B0C /* uchime_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uchime_main.cpp; sourceTree = "<group>"; };
+               A74D3682137DAB8300332B0C /* ultra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ultra.h; sourceTree = "<group>"; };
+               A74D3683137DAB8300332B0C /* usort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = usort.cpp; sourceTree = "<group>"; };
+               A74D3684137DAB8300332B0C /* viterbifast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = viterbifast.cpp; sourceTree = "<group>"; };
+               A74D3685137DAB8300332B0C /* windex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = windex.h; sourceTree = "<group>"; };
+               A74D3686137DAB8300332B0C /* writechhit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = writechhit.cpp; sourceTree = "<group>"; };
+               A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
+               A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
                A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
                A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = "<group>"; };
                A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = "<group>"; };
                        name = Products;
                        sourceTree = "<group>";
                };
+               A74D3644137DA7CE00332B0C /* uchime */ = {
+                       isa = PBXGroup;
+                       children = (
+                               A74D3655137DAB8300332B0C /* addtargets2.cpp */,
+                               A74D3656137DAB8300332B0C /* alignchime.cpp */,
+                               A74D3657137DAB8300332B0C /* alignchimel.cpp */,
+                               A74D3658137DAB8300332B0C /* allocs.h */,
+                               A74D3659137DAB8300332B0C /* alnheuristics.h */,
+                               A74D365A137DAB8300332B0C /* alnparams.cpp */,
+                               A74D365B137DAB8300332B0C /* alnparams.h */,
+                               A74D365C137DAB8300332B0C /* alpha.cpp */,
+                               A74D365D137DAB8300332B0C /* alpha.h */,
+                               A74D365E137DAB8300332B0C /* alpha2.cpp */,
+                               A74D365F137DAB8300332B0C /* chainer.h */,
+                               A74D3660137DAB8300332B0C /* chime.h */,
+                               A74D3661137DAB8300332B0C /* diagbox.h */,
+                               A74D3662137DAB8300332B0C /* dp.h */,
+                               A74D3663137DAB8300332B0C /* evalue.h */,
+                               A74D3664137DAB8300332B0C /* fractid.cpp */,
+                               A74D3665137DAB8300332B0C /* getparents.cpp */,
+                               A74D3666137DAB8300332B0C /* globalalign2.cpp */,
+                               A74D3667137DAB8300332B0C /* help.h */,
+                               A74D3668137DAB8300332B0C /* hsp.h */,
+                               A74D3669137DAB8300332B0C /* hspfinder.h */,
+                               A74D366A137DAB8300332B0C /* make3way.cpp */,
+                               A74D366B137DAB8300332B0C /* mx.cpp */,
+                               A74D366C137DAB8300332B0C /* mx.h */,
+                               A74D366D137DAB8300332B0C /* myopts.h */,
+                               A74D366E137DAB8300332B0C /* myutils.cpp */,
+                               A74D366F137DAB8300332B0C /* myutils.h */,
+                               A74D3670137DAB8300332B0C /* orf.h */,
+                               A74D3671137DAB8300332B0C /* out.h */,
+                               A74D3672137DAB8300332B0C /* path.cpp */,
+                               A74D3673137DAB8300332B0C /* path.h */,
+                               A74D3674137DAB8300332B0C /* searchchime.cpp */,
+                               A74D3675137DAB8300332B0C /* seq.h */,
+                               A74D3676137DAB8300332B0C /* seqdb.cpp */,
+                               A74D3677137DAB8300332B0C /* seqdb.h */,
+                               A74D3678137DAB8300332B0C /* setnucmx.cpp */,
+                               A74D3679137DAB8300332B0C /* sfasta.cpp */,
+                               A74D367A137DAB8300332B0C /* sfasta.h */,
+                               A74D367B137DAB8300332B0C /* svnmods.h */,
+                               A74D367C137DAB8300332B0C /* svnversion.h */,
+                               A74D367D137DAB8300332B0C /* timers.h */,
+                               A74D367E137DAB8300332B0C /* timing.h */,
+                               A74D367F137DAB8300332B0C /* tracebackbit.cpp */,
+                               A74D3680137DAB8300332B0C /* uc.h */,
+                               A74D3681137DAB8300332B0C /* uchime_main.cpp */,
+                               A74D3682137DAB8300332B0C /* ultra.h */,
+                               A74D3683137DAB8300332B0C /* usort.cpp */,
+                               A74D3684137DAB8300332B0C /* viterbifast.cpp */,
+                               A74D3685137DAB8300332B0C /* windex.h */,
+                               A74D3686137DAB8300332B0C /* writechhit.cpp */,
+                       );
+                       name = uchime;
+                       sourceTree = "<group>";
+               };
                A7E9BA3812D3956100DA6239 /* commands */ = {
                        isa = PBXGroup;
                        children = (
                                A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */,
                                A7E9B68B12D37EC400DA6239 /* chimeraslayercommand.h */,
                                A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */,
+                               A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */,
+                               A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */,
                                A7E9B68D12D37EC400DA6239 /* chopseqscommand.h */,
                                A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */,
                                A7E9B69112D37EC400DA6239 /* classifyotucommand.h */,
                A7E9BA4512D3965600DA6239 /* chimera */ = {
                        isa = PBXGroup;
                        children = (
+                               A74D3644137DA7CE00332B0C /* uchime */,
                                A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */,
                                A7E9B65D12D37EC300DA6239 /* bellerophon.h */,
                                A7E9B67412D37EC400DA6239 /* ccode.cpp */,
                                A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */,
                                A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */,
                                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */,
+                               A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */,
+                               A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */,
+                               A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */,
+                               A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */,
+                               A74D368B137DAB8400332B0C /* alpha.cpp in Sources */,
+                               A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */,
+                               A74D368D137DAB8400332B0C /* fractid.cpp in Sources */,
+                               A74D368E137DAB8400332B0C /* getparents.cpp in Sources */,
+                               A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */,
+                               A74D3690137DAB8400332B0C /* make3way.cpp in Sources */,
+                               A74D3691137DAB8400332B0C /* mx.cpp in Sources */,
+                               A74D3692137DAB8400332B0C /* myutils.cpp in Sources */,
+                               A74D3693137DAB8400332B0C /* path.cpp in Sources */,
+                               A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */,
+                               A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */,
+                               A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */,
+                               A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */,
+                               A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */,
+                               A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */,
+                               A74D369A137DAB8400332B0C /* usort.cpp in Sources */,
+                               A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */,
+                               A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */,
+                               A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                ARCHS = "$(ARCHS_STANDARD_64_BIT)";
                                DEPLOYMENT_LOCATION = NO;
                                GCC_C_LANGUAGE_STANDARD = gnu99;
+                               GCC_ENABLE_SSE3_EXTENSIONS = NO;
+                               GCC_ENABLE_SSE41_EXTENSIONS = NO;
+                               GCC_ENABLE_SSE42_EXTENSIONS = NO;
                                GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
diff --git a/addtargets2.cpp b/addtargets2.cpp
new file mode 100644 (file)
index 0000000..4e0dbd1
--- /dev/null
@@ -0,0 +1,38 @@
+//#if  UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+const float MAX_WORD_COUNT_DROP = 1;\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path);\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts,\r
+  vector<unsigned> &Order);\r
+\r
+void AddTargets(SeqDB &DB, const SeqData &Query, set<unsigned> &TargetIndexes)\r
+       {\r
+       const unsigned SeqCount = DB.GetSeqCount();\r
+       if (SeqCount == 0)\r
+               return;\r
+\r
+       vector<float> WordCounts;\r
+       vector<unsigned> Order;\r
+       USort(Query, DB, WordCounts, Order);\r
+       asserta(SIZE(Order) == SeqCount);\r
+       unsigned TopSeqIndex = Order[0];\r
+       float TopWordCount = WordCounts[TopSeqIndex];\r
+       for (unsigned i = 0; i < SeqCount; ++i)\r
+               {\r
+               unsigned SeqIndex = Order[i];\r
+               float WordCount = WordCounts[SeqIndex];\r
+               if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP)\r
+                       return;\r
+               TargetIndexes.insert(SeqIndex);\r
+               }\r
+       }\r
+\r
+//#endif\r
diff --git a/alignchime.cpp b/alignchime.cpp
new file mode 100644 (file)
index 0000000..d7b05a8
--- /dev/null
@@ -0,0 +1,649 @@
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+#include "dp.h"\r
+\r
+#define TRACE          0\r
+#define TRACE_BS       0\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit);\r
+\r
+double GetScore2(double Y, double N, double A)\r
+       {\r
+       return Y/(opt_xn*(N + opt_dn) + opt_xa*A);\r
+       }\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+       Hit.QLabel = QLabel;\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+       const unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+#if    TRACE\r
+       Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq);\r
+       Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq);\r
+       Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq);\r
+#endif\r
+\r
+// Discard terminal gaps\r
+       unsigned ColLo = UINT_MAX;\r
+       unsigned ColHi = UINT_MAX;\r
+       for (unsigned Col = 2; Col + 2 < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (isacgt(q) && isacgt(a) && isacgt(b))\r
+                       {\r
+                       if (ColLo == UINT_MAX)\r
+                               ColLo = Col;\r
+                       ColHi = Col;\r
+                       }\r
+               }\r
+\r
+       if (ColLo == UINT_MAX)\r
+               return;\r
+\r
+       unsigned QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned BPos = 0;\r
+       unsigned DiffCount = 0;\r
+\r
+       vector<unsigned> ColToQPos(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumCount(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumSameA(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumSameB(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumForA(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumForB(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumAbstain(ColLo, UINT_MAX);\r
+       vector<unsigned> AccumAgainst(ColLo, UINT_MAX);\r
+\r
+       unsigned SumSameA = 0;\r
+       unsigned SumSameB = 0;\r
+       unsigned SumSameAB = 0;\r
+       unsigned Sum = 0;\r
+       unsigned SumForA = 0;\r
+       unsigned SumForB = 0;\r
+       unsigned SumAbstain = 0;\r
+       unsigned SumAgainst = 0;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (isacgt(q) && isacgt(a) && isacgt(b))\r
+                       {\r
+                       if (q == a)\r
+                               ++SumSameA;\r
+                       if (q == b)\r
+                               ++SumSameB;\r
+                       if (a == b)\r
+                               ++SumSameAB;\r
+                       if (q == a && q != b)\r
+                               ++SumForA;\r
+                       if (q == b && q != a)\r
+                               ++SumForB;\r
+                       if (a == b && q != a)\r
+                               ++SumAgainst;\r
+                       if (q != a && q != b)\r
+                               ++SumAbstain;\r
+                       ++Sum;\r
+                       }\r
+\r
+               ColToQPos.push_back(QPos);\r
+               AccumSameA.push_back(SumSameA);\r
+               AccumSameB.push_back(SumSameB);\r
+               AccumCount.push_back(Sum);\r
+               AccumForA.push_back(SumForA);\r
+               AccumForB.push_back(SumForB);\r
+               AccumAbstain.push_back(SumAbstain);\r
+               AccumAgainst.push_back(SumAgainst);\r
+\r
+               if (q != '-')\r
+                       ++QPos;\r
+               if (a != '-')\r
+                       ++APos;\r
+               if (b != '-')\r
+                       ++BPos;\r
+               }\r
+\r
+       asserta(SIZE(ColToQPos) == ColHi+1);\r
+       asserta(SIZE(AccumSameA) == ColHi+1);\r
+       asserta(SIZE(AccumSameB) == ColHi+1);\r
+       asserta(SIZE(AccumAbstain) == ColHi+1);\r
+       asserta(SIZE(AccumAgainst) == ColHi+1);\r
+\r
+       double IdQA = double(SumSameA)/Sum;\r
+       double IdQB = double(SumSameB)/Sum;\r
+       double IdAB = double(SumSameAB)/Sum;\r
+       double MaxId = max(IdQA, IdQB);\r
+\r
+#if    TRACE\r
+       Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0);\r
+       Log("\n");\r
+       Log("    x  AQB   IdAL   IdBL   IdAR   IdBR   DivAB   DivBA    YAL    YBL    YAR    YBR    AbL    AbR  ScoreAB  ScoreAB    XLo    Xhi\n");\r
+       Log("-----  ---  -----  -----  -----  -----  ------  ------  -----  -----  -----  -----  -----  -----  -------  -------  -----  -----\n");\r
+#endif\r
+       unsigned BestXLo = UINT_MAX;\r
+       unsigned BestXHi = UINT_MAX;\r
+       double BestDiv = 0.0;\r
+       double BestIdQM = 0.0;\r
+       double BestScore = 0.0;\r
+\r
+// Find range of cols BestXLo..BestXHi that maximizes score\r
+       bool FirstA = false;\r
+\r
+// NOTE: Must be < ColHi not <= because use Col+1 below\r
+       for (unsigned Col = ColLo; Col < ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               unsigned SameAL = AccumSameA[Col];\r
+               unsigned SameBL = AccumSameB[Col];\r
+               unsigned SameAR = SumSameA - AccumSameA[Col];\r
+               unsigned SameBR = SumSameB - AccumSameB[Col];\r
+\r
+               double IdAB = double(SameAL + SameBR)/Sum;\r
+               double IdBA = double(SameBL + SameAR)/Sum;\r
+\r
+               unsigned ForAL = AccumForA[Col];\r
+               unsigned ForBL = AccumForB[Col];\r
+               unsigned ForAR = SumForA - AccumForA[Col+1];\r
+               unsigned ForBR = SumForB - AccumForB[Col+1];\r
+               unsigned AbL = AccumAbstain[Col];\r
+               unsigned AbR = SumAbstain - AccumAbstain[Col+1];\r
+\r
+               double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR);\r
+               double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR);\r
+       \r
+               double DivAB = IdAB/MaxId;\r
+               double DivBA = IdBA/MaxId;\r
+               double MaxDiv = max(DivAB, DivBA);\r
+\r
+               //if (MaxDiv > BestDiv)\r
+               //      {\r
+               //      BestDiv = MaxDiv;\r
+               //      BestXLo = Col;\r
+               //      BestXHi = Col;\r
+               //      FirstA = (DivAB > DivBA);\r
+               //      if (FirstA)\r
+               //              BestIdQM = IdAB;\r
+               //      else\r
+               //              BestIdQM = IdBA;\r
+               //      }\r
+               //else if (MaxDiv == BestDiv)\r
+               //      BestXHi = Col;\r
+\r
+               double MaxScore = max(ScoreAB, ScoreBA);\r
+               if (MaxScore > BestScore)\r
+                       {\r
+                       BestScore = MaxScore;\r
+                       BestXLo = Col;\r
+                       BestXHi = Col;\r
+                       FirstA = (ScoreAB > ScoreBA);\r
+                       if (FirstA)\r
+                               BestIdQM = IdAB;\r
+                       else\r
+                               BestIdQM = IdBA;\r
+                       if (MaxDiv > BestDiv)\r
+                               BestDiv = MaxDiv;\r
+                       }\r
+               else if (MaxScore == BestScore)\r
+                       {\r
+                       BestXHi = Col;\r
+                       if (MaxDiv > BestDiv)\r
+                               BestDiv = MaxDiv;\r
+                       }\r
+\r
+#if    TRACE\r
+               {\r
+               Log("%5u", Col);\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               Log("  %c%c%c", a, q, b);\r
+               Log("  %5u", SameAL);\r
+               Log("  %5u", SameBL);\r
+               Log("  %5u", SameAR);\r
+               Log("  %5u", SameBR);\r
+               Log("  %5.4f", DivAB);\r
+               Log("  %5.4f", DivBA);\r
+               Log("  %5u", ForAL);\r
+               Log("  %5u", ForBL);\r
+               Log("  %5u", ForAR);\r
+               Log("  %5u", ForBR);\r
+               Log("  %5u", AbL);\r
+               Log("  %5u", AbR);\r
+               Log("  %7.4f", ScoreAB);\r
+               Log("  %7.4f", ScoreBA);\r
+               if (BestXLo != UINT_MAX)\r
+                       Log("  %5u", BestXLo);\r
+               if (BestXHi != UINT_MAX)\r
+                       Log("  %5u", BestXHi);\r
+               Log("\n");\r
+               }\r
+#endif\r
+               }\r
+\r
+       if (BestXLo == UINT_MAX)\r
+               {\r
+#if    TRACE\r
+               Log("\n");\r
+               Log("No crossover found.\n");\r
+#endif\r
+               return;\r
+               }\r
+#if    TRACE\r
+       Log("BestX col %u - %u\n", BestXLo, BestXHi);\r
+#endif\r
+\r
+// Find maximum region of identity within BestXLo..BestXHi\r
+       unsigned ColXLo = (BestXLo + BestXHi)/2;\r
+       unsigned ColXHi = ColXLo;\r
+       unsigned SegLo = UINT_MAX;\r
+       unsigned SegHi = UINT_MAX;\r
+       for (unsigned Col = BestXLo; Col <= BestXHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (q == a && q == b)\r
+                       {\r
+                       if (SegLo == UINT_MAX)\r
+                               SegLo = Col;\r
+                       SegHi = Col;\r
+                       }\r
+               else\r
+                       {\r
+                       unsigned SegLength = SegHi - SegLo + 1;\r
+                       unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+                       if (SegLength > BestSegLength)\r
+                               {\r
+                               ColXLo = SegLo;\r
+                               ColXHi = SegHi;\r
+                               }\r
+                       SegLo = UINT_MAX;\r
+                       SegHi = UINT_MAX;\r
+                       }\r
+               }\r
+       unsigned SegLength = SegHi - SegLo + 1;\r
+       unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+       if (SegLength > BestSegLength)\r
+               {\r
+               ColXLo = SegLo;\r
+               ColXHi = SegHi;\r
+               }\r
+\r
+       QPos = 0;\r
+       for (unsigned x = 0; x < ColCount; ++x)\r
+               {\r
+               if (x == ColXLo)\r
+                       Hit.QXLo = QPos;\r
+               else if (x == ColXHi)\r
+                       {\r
+                       Hit.QXHi = QPos;\r
+                       break;\r
+                       }\r
+               char q = Q3Seq[x];\r
+               if (q != '-')\r
+                       ++QPos;\r
+               }\r
+\r
+       Hit.ColXLo = ColXLo;\r
+       Hit.ColXHi = ColXHi;\r
+\r
+       //if (FirstA)\r
+       //      {\r
+       //      Hit.LY = AccumForA[ColXLo];\r
+       //      Hit.LN = AccumForB[ColXLo];\r
+\r
+       //      Hit.RY = SumForB - AccumForB[ColXHi];\r
+       //      Hit.RN = SumForA - AccumForA[ColXHi];\r
+       //      }\r
+       //else\r
+       //      {\r
+       //      Hit.LY = AccumForB[ColXLo];\r
+       //      Hit.LN = AccumForA[ColXLo];\r
+       //      Hit.RY = SumForA - AccumForA[ColXHi];\r
+       //      Hit.RN = SumForB - AccumForB[ColXHi];\r
+       //      }\r
+\r
+       //Hit.LA = AccumAgainst[ColXLo];\r
+       //Hit.LD = AccumAbstain[ColXLo];\r
+\r
+       //Hit.RA = SumAgainst - AccumAgainst[ColXHi];\r
+       //Hit.RD = SumAbstain - AccumAbstain[ColXHi];\r
+\r
+       Hit.PctIdAB = IdAB*100.0;\r
+       Hit.PctIdQM = BestIdQM*100.0;\r
+\r
+       Hit.Div = (BestDiv - 1.0)*100.0;\r
+\r
+       //Hit.QSD = QSD;\r
+       Hit.Q3 = Q3;\r
+       Hit.QLabel = QLabel;\r
+       if (FirstA)\r
+               {\r
+               //Hit.ASD = ASD;\r
+               //Hit.BSD = BSD;\r
+               //Hit.PathQA = PathQA;\r
+               //Hit.PathQB = PathQB;\r
+               Hit.A3 = A3;\r
+               Hit.B3 = B3;\r
+               Hit.ALabel = ALabel;\r
+               Hit.BLabel = BLabel;\r
+               Hit.PctIdQA = IdQA*100.0;\r
+               Hit.PctIdQB = IdQB*100.0;\r
+               }\r
+       else\r
+               {\r
+               Hit.A3 = B3;\r
+               Hit.B3 = A3;\r
+               Hit.ALabel = BLabel;\r
+               Hit.BLabel = ALabel;\r
+               Hit.PctIdQA = IdQB*100.0;\r
+               Hit.PctIdQB = IdQA*100.0;\r
+               }\r
+\r
+// CS SNPs\r
+       Hit.CS_LY = 0;\r
+       Hit.CS_LN = 0;\r
+       Hit.CS_RY = 0;\r
+       Hit.CS_RN = 0;\r
+       Hit.CS_LA = 0;\r
+       Hit.CS_RA = 0;\r
+\r
+       //vector<float> Cons;\r
+       //for (unsigned Col = 0; Col < ColCount; ++Col)\r
+       //      {\r
+       //      char q = Q3Seq[Col];\r
+       //      char a = A3Seq[Col];\r
+       //      char b = B3Seq[Col];\r
+       //      if (q == a && q == b && a == b)\r
+       //              {\r
+       //              Cons.push_back(1.0f);\r
+       //              continue;\r
+       //              }\r
+\r
+       //      bool gapq = isgap(q);\r
+       //      bool gapa = isgap(a);\r
+       //      bool gapb = isgap(b);\r
+\r
+       //      if (!gapq && !gapa && !gapb)\r
+       //              {\r
+       //              if (q == a || q == b || a == b)\r
+       //                      Cons.push_back(0.75);\r
+       //              else\r
+       //                      Cons.push_back(0.5);\r
+       //              }\r
+       //      else\r
+       //              {\r
+       //              if (!gapa && (a == b || a == q))\r
+       //                      Cons.push_back(0.5f);\r
+       //              else if (!gapb && b == q)\r
+       //                      Cons.push_back(0.5f);\r
+       //              else\r
+       //                      Cons.push_back(0.0f);\r
+       //              }\r
+       //      }\r
+\r
+       //float fLY = 0.0f;\r
+       //float fLN = 0.0f;\r
+       //float fLA = 0.0f;\r
+       //float fRY = 0.0f;\r
+       //float fRN = 0.0f;\r
+       //float fRA = 0.0f;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+\r
+               unsigned ngaps = 0;\r
+               if (isgap(q))\r
+                       ++ngaps;\r
+               if (isgap(a))\r
+                       ++ngaps;\r
+               if (isgap(b))\r
+                       ++ngaps;\r
+\r
+               if (opt_skipgaps)\r
+                       {\r
+                       if (ngaps == 3)\r
+                               continue;\r
+                       }\r
+               else\r
+                       {\r
+                       if (ngaps == 2)\r
+                               continue;\r
+                       }\r
+\r
+               if (!FirstA)\r
+                       swap(a, b);\r
+\r
+               //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4;\r
+               //if (Col < ColXLo)\r
+               //      {\r
+               //      if (q == a && q != b)\r
+               //              fLY += AvgCons;\r
+               //      else if (q == b && q != a)\r
+               //              fLN += AvgCons;\r
+               //      else\r
+               //              fLA += AvgCons;\r
+               //      }\r
+               //else if (Col > ColXHi)\r
+               //      {\r
+               //      if (q == b && q != a)\r
+               //              fRY += AvgCons;\r
+               //      else if (q == a && q != b)\r
+               //              fRN += AvgCons;\r
+               //      else\r
+               //              fRA += AvgCons;\r
+               //      }\r
+\r
+               if (opt_skipgaps2)\r
+                       {\r
+                       if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                               continue;\r
+                       if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                               continue;\r
+                       }\r
+\r
+               //if (Col > 0 && isgap(Q3Seq[Col-1]))\r
+                       //continue;\r
+               //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1]))\r
+               //      continue;\r
+\r
+               if (Col < ColXLo)\r
+                       {\r
+                       if (q == a && q != b)\r
+                               ++Hit.CS_LY;\r
+                       else if (q == b && q != a)\r
+                               ++Hit.CS_LN;\r
+                       else\r
+                               ++Hit.CS_LA;\r
+                       }\r
+               else if (Col > ColXHi)\r
+                       {\r
+                       if (q == b && q != a)\r
+                               ++Hit.CS_RY;\r
+                       else if (q == a && q != b)\r
+                               ++Hit.CS_RN;\r
+                       else\r
+                               ++Hit.CS_RA;\r
+                       }\r
+               }\r
+\r
+       double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+       double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+       Hit.Score = ScoreL*ScoreR;\r
+\r
+       extern bool g_UchimeDeNovo;\r
+\r
+       //if (0)//g_UchimeDeNovo)\r
+       //      {\r
+       //      double AbQ = GetAbFromLabel(QLabel.c_str());\r
+       //      double AbA = GetAbFromLabel(ALabel.c_str());\r
+       //      double AbB = GetAbFromLabel(BLabel.c_str());\r
+       //      if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0)\r
+       //              {\r
+       //              double MinAb = min(AbA, AbB);\r
+       //              double Ratio = MinAb/AbQ;\r
+       //              double t = Ratio - opt_abx;\r
+       //      //      double Factor = 2.0/(1.0 + exp(-t));\r
+       //              double Factor = min(Ratio, opt_abx)/opt_abx;\r
+       //              if (opt_verbose)\r
+       //                      Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str());\r
+       //              Hit.Score *= Factor;\r
+       //              }\r
+       //      }\r
+\r
+       extern FILE *g_fUChimeAlns;\r
+       if (g_fUChimeAlns != 0 && Hit.Div > 0.0)\r
+               {\r
+               void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit);\r
+               WriteChimeHitX(g_fUChimeAlns, Hit);\r
+               }\r
+       }\r
+\r
+void AlignChime3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       if (opt_ucl)\r
+               AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+       else\r
+               AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+       }\r
+\r
+static void StripGaps(const byte *Seq, unsigned L, string &s)\r
+       {\r
+       s.clear();\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               char c = Seq[i];\r
+               if (!isgap(c))\r
+                       s.push_back(c);\r
+               }\r
+       }\r
+\r
+static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut)\r
+       {\r
+       SDOut = SDIn;\r
+       byte *s = myalloc(byte, SDIn.L);\r
+       unsigned k = 0;\r
+       for (unsigned i = 0; i < SDIn.L; ++i)\r
+               {\r
+               char c = SDIn.Seq[i];\r
+               if (!isgap(c))\r
+                       s[k++] = toupper(c);\r
+               }\r
+       SDOut.Seq = s;\r
+       SDOut.L = k;\r
+       }\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB, ChimeHit2 &Hit)\r
+       {\r
+       //if (opt_ucl)\r
+       //      {\r
+       //      AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+       //      return;\r
+       //      }\r
+\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+       Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3);\r
+\r
+       AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit);\r
+       }\r
+\r
+void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       SeqData QSD;\r
+       SeqData ASD;\r
+       SeqData BSD;\r
+       StripGapsAlloc(QSD3, QSD);\r
+       StripGapsAlloc(ASD3, ASD);\r
+       StripGapsAlloc(BSD3, BSD);\r
+\r
+       string PathQA;\r
+       string PathQB;\r
+       bool FoundQA = GlobalAlign(QSD, ASD, PathQA);\r
+       bool FoundQB = GlobalAlign(QSD, BSD, PathQB);\r
+       if (!FoundQA || !FoundQB)\r
+               {\r
+               Hit.Clear();\r
+               Hit.QLabel = QSD3.Label;\r
+               return;\r
+               }\r
+\r
+       AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+\r
+       myfree((void *) QSD.Seq);\r
+       myfree((void *) ASD.Seq);\r
+       myfree((void *) BSD.Seq);\r
+       }\r
+\r
+void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       if (opt_realign)\r
+               {\r
+               AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit);\r
+               return;\r
+               }\r
+\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+\r
+       const unsigned ColCount = QSD3.L;\r
+       asserta(ASD3.L == ColCount && BSD3.L == ColCount);\r
+\r
+       Q3.reserve(ColCount);\r
+       A3.reserve(ColCount);\r
+       B3.reserve(ColCount);\r
+\r
+       const byte *QS = QSD3.Seq;\r
+       const byte *AS = ASD3.Seq;\r
+       const byte *BS = BSD3.Seq;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               byte q = toupper(QS[Col]);\r
+               byte a = toupper(AS[Col]);\r
+               byte b = toupper(BS[Col]);\r
+\r
+               if (isgap(q) && isgap(a) && isgap(b))\r
+                       continue;\r
+\r
+               Q3.push_back(q);\r
+               A3.push_back(a);\r
+               B3.push_back(b);\r
+               }\r
+\r
+       AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit);\r
+       }\r
diff --git a/alignchimel.cpp b/alignchimel.cpp
new file mode 100644 (file)
index 0000000..ae152af
--- /dev/null
@@ -0,0 +1,417 @@
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+\r
+#define        TRACE   0\r
+\r
+/***\r
+Let:\r
+       S[i] =  Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
+\r
+       V[k] =  Best segment score from j, j+1 .. k for all possible j\r
+                       max(j) Sum i=j..k S[i]\r
+\r
+Recursion relation:\r
+       V[k] =  S[k] + max (V[k-1], 0)\r
+***/\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit);\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3);\r
+\r
+double GetScore2(double Y, double N, double A);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+       const unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       vector<float> ColScoresA(ColCount, 0.0f);\r
+       vector<float> ColScoresB(ColCount, 0.0f);\r
+\r
+       float ScoreN = -(float) opt_xn;\r
+       unsigned QL = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q))\r
+                       ++QL;\r
+\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (q == a && q != b)\r
+                       ColScoresA[Col] = 1;\r
+               else\r
+                       ColScoresA[Col] = ScoreN;\r
+\r
+               if (q == b && q != a)\r
+                       ColScoresB[Col] = 1;\r
+               else\r
+                       ColScoresB[Col] = ScoreN;\r
+               }\r
+\r
+       vector<float> LVA(ColCount, 0.0f);\r
+       vector<float> LVB(ColCount, 0.0f);\r
+\r
+       LVA[0] = ColScoresA[0];\r
+       LVB[0] = ColScoresB[0];\r
+       for (unsigned Col = 1; Col < ColCount; ++Col)\r
+               {\r
+               LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
+               LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       vector<float> RVA(ColCount, 0.0f);\r
+       vector<float> RVB(ColCount, 0.0f);\r
+\r
+       RVA[ColCount-1] = ColScoresA[ColCount-1];\r
+       RVB[ColCount-1] = ColScoresB[ColCount-1];\r
+       for (int Col = ColCount-2; Col >= 0; --Col)\r
+               {\r
+               RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
+               RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       bool FirstA = true;\r
+       float MaxSum = 0.0;\r
+       unsigned ColX = UINT_MAX;\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVA[Col] + RVB[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = true;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVB[Col] + RVA[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = false;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+       if (ColX == UINT_MAX)\r
+               return;\r
+\r
+       unsigned ColLo = UINT_MAX;\r
+       unsigned ColHi = UINT_MAX;\r
+       if (FirstA)\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= LVA[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVA[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= RVB[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVB[ColX]);\r
+               }\r
+       else\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= LVB[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVB[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= RVA[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVA[ColX]);\r
+               }\r
+\r
+       unsigned ColXHi = ColX;\r
+       for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXHi = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned ColXLo = ColX;\r
+       for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXLo = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned IdQA = 0;\r
+       unsigned IdQB = 0;\r
+       unsigned IdAB = 0;\r
+       unsigned NQA = 0;\r
+       unsigned NQB = 0;\r
+       unsigned NAB = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       ++NQA;\r
+                       if (q == a)\r
+                               ++IdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       ++NQB;\r
+                       if (q == b)\r
+                               ++IdQB;\r
+                       }\r
+\r
+               if (!isgap(a) && !isgap(b))\r
+                       {\r
+                       ++NAB;\r
+                       if (a == b)\r
+                               ++IdAB;\r
+                       }\r
+               }\r
+\r
+       Hit.PctIdQA = Pct(IdQA, NQA);\r
+       Hit.PctIdQB = Pct(IdQB, NQB);\r
+       Hit.PctIdAB = Pct(IdAB, NAB);\r
+\r
+       unsigned LIdQA = 0;\r
+       unsigned LIdQB = 0;\r
+       for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++LIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++LIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned RIdQA = 0;\r
+       unsigned RIdQB = 0;\r
+       for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++RIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++RIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
+       unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
+       unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
+       unsigned ColRange = ColHi - ColLo + 1;\r
+       if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
+               return;\r
+\r
+//     double Div = Pct(MinIdDiff, QSD.L);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("  Col  A Q B   ScoreA   ScoreB      LVA      LVB      RVA      RVB\n");\r
+       Log("-----  - - -  -------  -------  -------  -------  -------  -------\n");\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
+                       continue;\r
+\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               Log("%5u  %c %c %c", Col, a, q, b);\r
+\r
+               if (ColScoresA[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresA[Col]);\r
+\r
+               if (ColScoresB[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresB[Col]);\r
+\r
+               Log("  %7.1f  %7.1f  %7.1f  %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
+\r
+               Log("\n");\r
+               }\r
+       Log("\n");\r
+       Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
+         MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
+       Log("  LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
+       }\r
+#endif\r
+\r
+       string Q3L;\r
+       string A3L;\r
+       string B3L;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3[Col];\r
+               char a = A3[Col];\r
+               char b = B3[Col];\r
+\r
+               Q3L += q;\r
+               A3L += a;\r
+               B3L += b;\r
+               }\r
+\r
+       AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
+\r
+#if    0\r
+// CS SNPs\r
+       Hit.CS_LY = 0;\r
+       Hit.CS_LN = 0;\r
+       Hit.CS_RY = 0;\r
+       Hit.CS_RN = 0;\r
+       Hit.CS_LA = 0;\r
+       Hit.CS_RA = 0;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (!FirstA)\r
+                       swap(a, b);\r
+\r
+               if (Col < ColXLo)\r
+                       {\r
+                       if (q == a && q != b)\r
+                               ++Hit.CS_LY;\r
+                       else if (q == b && q != a)\r
+                               ++Hit.CS_LN;\r
+                       else\r
+                               ++Hit.CS_LA;\r
+                       }\r
+               else if (Col > ColXHi)\r
+                       {\r
+                       if (q == b && q != a)\r
+                               ++Hit.CS_RY;\r
+                       else if (q == a && q != b)\r
+                               ++Hit.CS_RN;\r
+                       else\r
+                               ++Hit.CS_RA;\r
+                       }\r
+               }\r
+\r
+       double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+       double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+       Hit.Score = ScoreL*ScoreR;\r
+\r
+       //Hit.QSD = QSD;\r
+       //if (FirstA)\r
+       //      {\r
+       //      Hit.ASD = ASD;\r
+       //      Hit.BSD = BSD;\r
+       //      Hit.PathQA = PathQA;\r
+       //      Hit.PathQB = PathQB;\r
+       //      }\r
+       //else\r
+       //      {\r
+       //      Hit.ASD = BSD;\r
+       //      Hit.BSD = ASD;\r
+       //      }\r
+\r
+       //Hit.ColLo = ColLo;\r
+       //Hit.ColXLo = ColXLo;\r
+       //Hit.ColXHi = ColXHi;\r
+       //Hit.ColHi = ColHi;\r
+       //Hit.Div = Div;\r
+\r
+//     Hit.LogMe();\r
+#endif\r
+       }\r
diff --git a/allocs.h b/allocs.h
new file mode 100644 (file)
index 0000000..157d03e
--- /dev/null
+++ b/allocs.h
@@ -0,0 +1,24 @@
+A(Alpha)\r
+A(Mx)\r
+A(ChainBrute)\r
+A(Chainer)\r
+A(Test)\r
+A(CompressPath)\r
+A(HSPFinder)\r
+A(Main)\r
+A(Clumps)\r
+A(Path)\r
+A(SeqDB)\r
+A(SFasta)\r
+A(SWUngapped)\r
+A(AllocBit)\r
+A(Ultra)\r
+A(UPGMA)\r
+A(Windex)\r
+A(XDropBwd)\r
+A(Xlat)\r
+A(MPath)\r
+A(ScoreCache)\r
+A(TargetHits)\r
+A(Out)\r
+A(Hashdex)\r
diff --git a/alnheuristics.h b/alnheuristics.h
new file mode 100644 (file)
index 0000000..9a8d283
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef alnheuristics_h\r
+#define alnheuristics_h\r
+\r
+struct AlnParams;\r
+\r
+struct AlnHeuristics\r
+       {\r
+       unsigned BandRadius;\r
+       unsigned HSPFinderWordLength;\r
+       float SeedT;\r
+\r
+       float XDropG;                   //  GappedBlast default\r
+       float XDropU;                   //  UngappedBlast default\r
+       float XDropUG;                  //  UngappedBlast called by GappedBlast\r
+\r
+       unsigned MinGlobalHSPLength;\r
+\r
+       AlnHeuristics();\r
+       void InitFromCmdLine(const AlnParams &AP);\r
+       void InitGlobalFull();\r
+\r
+       bool IsGlobalFull() const\r
+               {\r
+               return MinGlobalHSPLength == 0 && BandRadius == 0;\r
+               }\r
+\r
+       };\r
+\r
+#endif // alnheuristics_h\r
diff --git a/alnparams.cpp b/alnparams.cpp
new file mode 100644 (file)
index 0000000..d1b9036
--- /dev/null
@@ -0,0 +1,414 @@
+#include "myutils.h"\r
+#include <float.h>     // for FLT_MAX\r
+#include "mx.h"\r
+#include "alnparams.h"\r
+#include "hsp.h"\r
+\r
+#define TEST   0\r
+\r
+void SetBLOSUM62();
+void SetNucSubstMx(double Match, double Mismatch);\r
+void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
+
+extern Mx<float> g_SubstMxf;
+extern float **g_SubstMx;
+\r
+void AlnParams::Clear()\r
+       {\r
+       SubstMxName = 0;\r
+       LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
+       LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       Nucleo = false;\r
+       NucleoSet = false;\r
+       }\r
+\r
+bool AlnParams::Is2() const\r
+       {\r
+       float g = OpenA;\r
+       float e = ExtA;\r
+       if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
+               return false;\r
+       if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+bool AlnParams::Is4() const\r
+       {\r
+       float g = OpenA;\r
+       float tg = LOpenA;\r
+       float e = ExtA;\r
+       float te = LExtA;\r
+       if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
+               return false;\r
+       if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+const char *AlnParams::GetType() const\r
+       {\r
+       if (Is2())\r
+               return "2";\r
+       else if (Is4())\r
+               return "4";\r
+       return "12";\r
+       }\r
+\r
+void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
+       ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
+       }\r
+\r
+void AlnParams::SetLocal(float Open, float Ext)\r
+       {\r
+       LocalOpen = Open;\r
+       LocalExt = Ext;\r
+       }\r
+\r
+void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
+  float TermOpen, float TermExt)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = Open;\r
+       LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
+       ExtA = ExtB = Ext;\r
+       LExtA = LExtB = RExtA = RExtB = TermExt;\r
+       }\r
+\r
+void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
+  unsigned LA, unsigned LB)\r
+       {\r
+       SubstMx = AP.SubstMx;\r
+       OpenA = AP.OpenA;\r
+       OpenB = AP.OpenB;\r
+       ExtA = AP.ExtA;\r
+       ExtB = AP.ExtB;\r
+\r
+       if (HSP.LeftA())\r
+               {\r
+               LOpenA = AP.LOpenA;\r
+               LExtA = AP.LExtA;\r
+               }\r
+       else\r
+               {\r
+               LOpenA = AP.OpenA;\r
+               LExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.LeftB())\r
+               {\r
+               LOpenB = AP.LOpenB;\r
+               LExtB = AP.LExtB;\r
+               }\r
+       else\r
+               {\r
+               LOpenB = AP.OpenB;\r
+               LExtB = AP.ExtB;\r
+               }\r
+\r
+       if (HSP.RightA(LA))\r
+               {\r
+               ROpenA = AP.ROpenA;\r
+               RExtA = AP.RExtA;\r
+               }\r
+       else\r
+               {\r
+               ROpenA = AP.OpenA;\r
+               RExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.RightB(LB))\r
+               {\r
+               ROpenB = AP.ROpenB;\r
+               RExtB = AP.RExtB;\r
+               }\r
+       else\r
+               {\r
+               ROpenB = AP.OpenB;\r
+               RExtB = AP.ExtB;\r
+               }\r
+       }\r
+\r
+void AlnParams::LogMe() const\r
+       {\r
+       Log("AlnParams(%s)", GetType());\r
+       if (Is2())\r
+               Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
+       else if (Is4())\r
+               Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
+       else\r
+               Log(\r
+" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
+                 OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
+       Log("\n");\r
+       }\r
+\r
+/***\r
+Open/Ext format string is one or more:\r
+       [<flag><flag>...]<value>\r
+\r
+Value is (positive) penalty or * (disabled).\r
+Flag is:\r
+       Q               Query.\r
+       T               Target sequence.\r
+       I               Internal gaps (defafault internal and terminal).\r
+       E               End gaps (default internal and terminal).\r
+       L               Left end.\r
+       R               Right end.\r
+***/\r
+\r
+static void ParseGapStr(const string &s,\r
+  float &QI, float &QL, float &QR,\r
+  float &TI, float &TL, float &TR)\r
+       {\r
+       if (s.empty())\r
+               return;\r
+\r
+       bool Q = false;\r
+       bool T = false;\r
+       bool I = false;\r
+       bool E = false;\r
+       bool L = false;\r
+       bool R = false;\r
+\r
+       const unsigned K = SIZE(s);\r
+       unsigned Dec = 0;\r
+       float Value = FLT_MAX;\r
+       for (unsigned i = 0; i <= K; ++i)\r
+               {\r
+               char c = s.c_str()[i];\r
+               if (c == 0 || c == '/')\r
+                       {\r
+                       if (Value == FLT_MAX)\r
+                               Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
+                       if (!Q && !T && !I && !E && !L && !R)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               L = true;\r
+                               R = true;\r
+                               I = true;\r
+                               }\r
+\r
+                       if (!E && !I && !L && !R)\r
+                               {\r
+                               E = false;\r
+                               I = true;\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (E)\r
+                               {\r
+                               if (L || R)\r
+                                       Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (!Q && !T)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               }\r
+\r
+                       if (Q && L)\r
+                               QL = -Value;\r
+                       if (Q && R)\r
+                               QR = -Value;\r
+                       if (Q && I)\r
+                               QI = -Value;\r
+                       if (T && L)\r
+                               TL = -Value;\r
+                       if (T && R)\r
+                               TR = -Value;\r
+                       if (T && I)\r
+                               TI = -Value;\r
+                       \r
+                       Value = FLT_MAX;\r
+                       Dec = 0;\r
+                       Q = false;\r
+                       T = false;\r
+                       I = false;\r
+                       E = false;\r
+                       L = false;\r
+                       R = false;\r
+                       }\r
+               else if (c == '*')\r
+                       {\r
+                       if (Value != FLT_MAX)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       Value = -MINUS_INFINITY;\r
+                       }\r
+               else if (isdigit(c))\r
+                       {\r
+                       if (Value == -MINUS_INFINITY)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       if (Value == FLT_MAX)\r
+                               Value = 0.0;\r
+                       if (Dec > 0)\r
+                               {\r
+                               Dec *= 10;\r
+                               Value += float(c - '0')/Dec;\r
+                               }\r
+                       else\r
+                               Value = Value*10 + (c - '0');\r
+                       }\r
+               else if (c == '.')\r
+                       {\r
+                       if (Dec > 0)\r
+                               Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
+                       Dec = 1;\r
+                       }\r
+               else\r
+                       {\r
+                       switch (c)\r
+                               {\r
+                       case 'Q':\r
+                               Q = true;\r
+                               break;\r
+                       case 'T':\r
+                               T = true;\r
+                               break;\r
+                       case 'I':\r
+                               I = true;\r
+                               break;\r
+                       case 'L':\r
+                               L = true;\r
+                               break;\r
+                       case 'R':\r
+                               R = true;\r
+                               break;\r
+                       case 'E':\r
+                               E = true;\r
+                               break;\r
+                       default:\r
+                               Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
+                               }\r
+                       }\r
+               }\r
+       }\r
+\r
+void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
+       {\r
+       ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
+       ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
+       }\r
+\r
+void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
+       {\r
+       if (IsNucleo)\r
+               SetNucSubstMx(opt_match, opt_mismatch);
+       else\r
+               {\r
+               if (opt_matrix == "")\r
+                       {\r
+                       SubstMxName = "BLOSUM62";\r
+                       SetBLOSUM62();
+                       }
+               else\r
+                       {\r
+                       ReadSubstMx(opt_matrix, g_SubstMxf);\r
+                       g_SubstMx = g_SubstMxf.GetData();\r
+                       g_SubstMxf.LogMe();\r
+                       SubstMxName = opt_matrix.c_str();\r
+                       }\r
+               }\r
+       SubstMx = g_SubstMx;\r
+       asserta(SubstMx != 0);\r
+       }\r
+\r
+void AlnParams::InitFromCmdLine(bool IsNucleo)\r
+       {\r
+       Clear();\r
+       Nucleo = IsNucleo;\r
+       NucleoSet = true;\r
+\r
+       SetMxFromCmdLine(IsNucleo);\r
+\r
+// Local\r
+       if (optset_lopen || optset_lext)\r
+               {\r
+               if (!optset_lopen || !optset_lext)\r
+                       Die("Must set both --lopen and --lext");\r
+               if (opt_lopen < 0.0 || opt_lext < 0.0)\r
+                       Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
+               SetLocal(float(-opt_lopen), float(-opt_lext));\r
+               }\r
+       else\r
+               {\r
+       // Same penalties, if-statement to note could differ.\r
+               if (IsNucleo)\r
+                       SetLocal(-10.0f, -1.0f);\r
+               else\r
+                       SetLocal(-10.0f, -1.0f);\r
+               }\r
+\r
+// Global\r
+       if (IsNucleo)\r
+               Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
+       else\r
+               Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
+       SetPenalties(opt_gapopen, opt_gapext);\r
+       }\r
+\r
+float AlnParams::GetLocalOpen() const\r
+       {\r
+       return LocalOpen;\r
+       }\r
+\r
+float AlnParams::GetLocalExt() const\r
+       {\r
+       return LocalExt;\r
+       }\r
+\r
+bool AlnParams::GetIsNucleo() const\r
+       {\r
+       asserta(NucleoSet);\r
+       return Nucleo;\r
+       }\r
+\r
+unsigned GetWindexWordLength(bool Nucleo)\r
+       {\r
+       if (optset_w)\r
+               return opt_w;\r
+\r
+       if (Nucleo)\r
+               return 8;\r
+       else\r
+               return 5;\r
+       }\r
+\r
+#if    TEST\r
+static void Test1(const string &os, const string &es)\r
+       {\r
+       AlnParams AP;\r
+       Log("\n");\r
+       Log("OpenStr %s\n", os.c_str());\r
+       Log(" ExtStr %s\n", es.c_str());\r
+       AP.SetPenalties(os, es);\r
+       AP.LogMe();\r
+       }\r
+\r
+void TestGapStr()\r
+       {\r
+       Test1("17I/0.5E", "1I/0.5E");\r
+       Test1("17I/0.5L/0.4R", "1Q/2T");\r
+       Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
+       }\r
+#endif // TEST\r
diff --git a/alnparams.h b/alnparams.h
new file mode 100644 (file)
index 0000000..4037912
--- /dev/null
@@ -0,0 +1,59 @@
+#ifndef alnparams_h\r
+#define alnparams_h\r
+\r
+struct HSPData;\r
+\r
+// Gap penalty scores are negative\r
+// (i.e., are scores, not penalties).\r
+struct AlnParams\r
+       {\r
+       const char *SubstMxName;\r
+       const float * const *SubstMx;\r
+\r
+       bool Nucleo;\r
+       bool NucleoSet;\r
+\r
+// Local gaps\r
+       float LocalOpen;\r
+       float LocalExt;\r
+\r
+// Global internal gaps\r
+       float OpenA;\r
+       float OpenB;\r
+\r
+       float ExtA;\r
+       float ExtB;\r
+\r
+// Global terminal gaps\r
+       float LOpenA;\r
+       float LOpenB;\r
+       float ROpenA;\r
+       float ROpenB;\r
+\r
+       float LExtA;\r
+       float LExtB;\r
+       float RExtA;\r
+       float RExtB;\r
+\r
+       void Clear();\r
+       void SetLocal(float Open, float Ext);\r
+       void Init2(const float * const *Mx, float Open, float Ext);\r
+       void Init4(const float * const *Mx, float Open, float Ext, float TermOpen, float TermExt);\r
+       void Init(const AlnParams &AP, const HSPData &HSP, unsigned LA, unsigned LB);\r
+       void InitFromCmdLine(bool Nucleo);\r
+       void SetMxFromCmdLine(bool Nucleo);\r
+       void SetPenalties(const string &OpenStr, const string &ExtStr);\r
+       float GetLocalOpen() const;\r
+       float GetLocalExt() const;\r
+       bool GetIsNucleo() const;\r
+\r
+       bool Is2() const;\r
+       bool Is4() const;\r
+       const char *GetType() const;\r
+\r
+       void LogMe() const;\r
+       };\r
+\r
+const float OBVIOUSLY_WRONG_PENALTY = 1000.0;\r
+\r
+#endif // alnparams_h\r
diff --git a/alpha.cpp b/alpha.cpp
new file mode 100644 (file)
index 0000000..0efca3b
--- /dev/null
+++ b/alpha.cpp
@@ -0,0 +1,2761 @@
+// Generated by /p/py/alphac.py
+#include "alpha.h"
+
+unsigned g_CharToLetterAminoStop[256] =
+       {
+       INVALID_LETTER, // [  0] 0x00
+       INVALID_LETTER, // [  1] 0x01
+       INVALID_LETTER, // [  2] 0x02
+       INVALID_LETTER, // [  3] 0x03
+       INVALID_LETTER, // [  4] 0x04
+       INVALID_LETTER, // [  5] 0x05
+       INVALID_LETTER, // [  6] 0x06
+       INVALID_LETTER, // [  7] 0x07
+       INVALID_LETTER, // [  8] 0x08
+       INVALID_LETTER, // [  9] 0x09
+       INVALID_LETTER, // [ 10] 0x0a
+       INVALID_LETTER, // [ 11] 0x0b
+       INVALID_LETTER, // [ 12] 0x0c
+       INVALID_LETTER, // [ 13] 0x0d
+       INVALID_LETTER, // [ 14] 0x0e
+       INVALID_LETTER, // [ 15] 0x0f
+       INVALID_LETTER, // [ 16] 0x10
+       INVALID_LETTER, // [ 17] 0x11
+       INVALID_LETTER, // [ 18] 0x12
+       INVALID_LETTER, // [ 19] 0x13
+       INVALID_LETTER, // [ 20] 0x14
+       INVALID_LETTER, // [ 21] 0x15
+       INVALID_LETTER, // [ 22] 0x16
+       INVALID_LETTER, // [ 23] 0x17
+       INVALID_LETTER, // [ 24] 0x18
+       INVALID_LETTER, // [ 25] 0x19
+       INVALID_LETTER, // [ 26] 0x1a
+       INVALID_LETTER, // [ 27] 0x1b
+       INVALID_LETTER, // [ 28] 0x1c
+       INVALID_LETTER, // [ 29] 0x1d
+       INVALID_LETTER, // [ 30] 0x1e
+       INVALID_LETTER, // [ 31] 0x1f
+       INVALID_LETTER, // [ 32] ' '
+       INVALID_LETTER, // [ 33] '!'
+       INVALID_LETTER, // [ 34] '"'
+       INVALID_LETTER, // [ 35] '#'
+       INVALID_LETTER, // [ 36] '$'
+       INVALID_LETTER, // [ 37] '%'
+       INVALID_LETTER, // [ 38] '&'
+       INVALID_LETTER, // [ 39] '''
+       INVALID_LETTER, // [ 40] '('
+       INVALID_LETTER, // [ 41] ')'
+       20 ,            // [ 42] '*' = STP
+       INVALID_LETTER, // [ 43] '+'
+       INVALID_LETTER, // [ 44] ','
+       INVALID_LETTER, // [ 45] '-'
+       INVALID_LETTER, // [ 46] '.'
+       INVALID_LETTER, // [ 47] '/'
+       INVALID_LETTER, // [ 48] '0'
+       INVALID_LETTER, // [ 49] '1'
+       INVALID_LETTER, // [ 50] '2'
+       INVALID_LETTER, // [ 51] '3'
+       INVALID_LETTER, // [ 52] '4'
+       INVALID_LETTER, // [ 53] '5'
+       INVALID_LETTER, // [ 54] '6'
+       INVALID_LETTER, // [ 55] '7'
+       INVALID_LETTER, // [ 56] '8'
+       INVALID_LETTER, // [ 57] '9'
+       INVALID_LETTER, // [ 58] ':'
+       INVALID_LETTER, // [ 59] ';'
+       INVALID_LETTER, // [ 60] '<'
+       INVALID_LETTER, // [ 61] '='
+       INVALID_LETTER, // [ 62] '>'
+       INVALID_LETTER, // [ 63] '?'
+       INVALID_LETTER, // [ 64] '@'
+       0  ,            // [ 65] 'A' = Ala
+       INVALID_LETTER, // [ 66] 'B'
+       1  ,            // [ 67] 'C' = Cys
+       2  ,            // [ 68] 'D' = Asp
+       3  ,            // [ 69] 'E' = Glu
+       4  ,            // [ 70] 'F' = Phe
+       5  ,            // [ 71] 'G' = Gly
+       6  ,            // [ 72] 'H' = His
+       7  ,            // [ 73] 'I' = Ile
+       INVALID_LETTER, // [ 74] 'J'
+       8  ,            // [ 75] 'K' = Lys
+       9  ,            // [ 76] 'L' = Leu
+       10 ,            // [ 77] 'M' = Met
+       11 ,            // [ 78] 'N' = Asn
+       INVALID_LETTER, // [ 79] 'O'
+       12 ,            // [ 80] 'P' = Pro
+       13 ,            // [ 81] 'Q' = Gln
+       14 ,            // [ 82] 'R' = Arg
+       15 ,            // [ 83] 'S' = Ser
+       16 ,            // [ 84] 'T' = Thr
+       INVALID_LETTER, // [ 85] 'U'
+       17 ,            // [ 86] 'V' = Val
+       18 ,            // [ 87] 'W' = Trp
+       INVALID_LETTER, // [ 88] 'X'
+       19 ,            // [ 89] 'Y' = Tyr
+       INVALID_LETTER, // [ 90] 'Z'
+       INVALID_LETTER, // [ 91] '['
+       INVALID_LETTER, // [ 92] '\'
+       INVALID_LETTER, // [ 93] ']'
+       INVALID_LETTER, // [ 94] '^'
+       INVALID_LETTER, // [ 95] '_'
+       INVALID_LETTER, // [ 96] '`'
+       0  ,            // [ 97] 'a' = Ala
+       INVALID_LETTER, // [ 98] 'b'
+       1  ,            // [ 99] 'c' = Cys
+       2  ,            // [100] 'd' = Asp
+       3  ,            // [101] 'e' = Glu
+       4  ,            // [102] 'f' = Phe
+       5  ,            // [103] 'g' = Gly
+       6  ,            // [104] 'h' = His
+       7  ,            // [105] 'i' = Ile
+       INVALID_LETTER, // [106] 'j'
+       8  ,            // [107] 'k' = Lys
+       9  ,            // [108] 'l' = Leu
+       10 ,            // [109] 'm' = Met
+       11 ,            // [110] 'n' = Asn
+       INVALID_LETTER, // [111] 'o'
+       12 ,            // [112] 'p' = Pro
+       13 ,            // [113] 'q' = Gln
+       14 ,            // [114] 'r' = Arg
+       15 ,            // [115] 's' = Ser
+       16 ,            // [116] 't' = Thr
+       INVALID_LETTER, // [117] 'u'
+       17 ,            // [118] 'v' = Val
+       18 ,            // [119] 'w' = Trp
+       INVALID_LETTER, // [120] 'x'
+       19 ,            // [121] 'y' = Tyr
+       INVALID_LETTER, // [122] 'z'
+       INVALID_LETTER, // [123] '{'
+       INVALID_LETTER, // [124] '|'
+       INVALID_LETTER, // [125] '}'
+       INVALID_LETTER, // [126] '~'
+       INVALID_LETTER, // [127] 0x7f
+       INVALID_LETTER, // [128] 0x80
+       INVALID_LETTER, // [129] 0x81
+       INVALID_LETTER, // [130] 0x82
+       INVALID_LETTER, // [131] 0x83
+       INVALID_LETTER, // [132] 0x84
+       INVALID_LETTER, // [133] 0x85
+       INVALID_LETTER, // [134] 0x86
+       INVALID_LETTER, // [135] 0x87
+       INVALID_LETTER, // [136] 0x88
+       INVALID_LETTER, // [137] 0x89
+       INVALID_LETTER, // [138] 0x8a
+       INVALID_LETTER, // [139] 0x8b
+       INVALID_LETTER, // [140] 0x8c
+       INVALID_LETTER, // [141] 0x8d
+       INVALID_LETTER, // [142] 0x8e
+       INVALID_LETTER, // [143] 0x8f
+       INVALID_LETTER, // [144] 0x90
+       INVALID_LETTER, // [145] 0x91
+       INVALID_LETTER, // [146] 0x92
+       INVALID_LETTER, // [147] 0x93
+       INVALID_LETTER, // [148] 0x94
+       INVALID_LETTER, // [149] 0x95
+       INVALID_LETTER, // [150] 0x96
+       INVALID_LETTER, // [151] 0x97
+       INVALID_LETTER, // [152] 0x98
+       INVALID_LETTER, // [153] 0x99
+       INVALID_LETTER, // [154] 0x9a
+       INVALID_LETTER, // [155] 0x9b
+       INVALID_LETTER, // [156] 0x9c
+       INVALID_LETTER, // [157] 0x9d
+       INVALID_LETTER, // [158] 0x9e
+       INVALID_LETTER, // [159] 0x9f
+       INVALID_LETTER, // [160] 0xa0
+       INVALID_LETTER, // [161] 0xa1
+       INVALID_LETTER, // [162] 0xa2
+       INVALID_LETTER, // [163] 0xa3
+       INVALID_LETTER, // [164] 0xa4
+       INVALID_LETTER, // [165] 0xa5
+       INVALID_LETTER, // [166] 0xa6
+       INVALID_LETTER, // [167] 0xa7
+       INVALID_LETTER, // [168] 0xa8
+       INVALID_LETTER, // [169] 0xa9
+       INVALID_LETTER, // [170] 0xaa
+       INVALID_LETTER, // [171] 0xab
+       INVALID_LETTER, // [172] 0xac
+       INVALID_LETTER, // [173] 0xad
+       INVALID_LETTER, // [174] 0xae
+       INVALID_LETTER, // [175] 0xaf
+       INVALID_LETTER, // [176] 0xb0
+       INVALID_LETTER, // [177] 0xb1
+       INVALID_LETTER, // [178] 0xb2
+       INVALID_LETTER, // [179] 0xb3
+       INVALID_LETTER, // [180] 0xb4
+       INVALID_LETTER, // [181] 0xb5
+       INVALID_LETTER, // [182] 0xb6
+       INVALID_LETTER, // [183] 0xb7
+       INVALID_LETTER, // [184] 0xb8
+       INVALID_LETTER, // [185] 0xb9
+       INVALID_LETTER, // [186] 0xba
+       INVALID_LETTER, // [187] 0xbb
+       INVALID_LETTER, // [188] 0xbc
+       INVALID_LETTER, // [189] 0xbd
+       INVALID_LETTER, // [190] 0xbe
+       INVALID_LETTER, // [191] 0xbf
+       INVALID_LETTER, // [192] 0xc0
+       INVALID_LETTER, // [193] 0xc1
+       INVALID_LETTER, // [194] 0xc2
+       INVALID_LETTER, // [195] 0xc3
+       INVALID_LETTER, // [196] 0xc4
+       INVALID_LETTER, // [197] 0xc5
+       INVALID_LETTER, // [198] 0xc6
+       INVALID_LETTER, // [199] 0xc7
+       INVALID_LETTER, // [200] 0xc8
+       INVALID_LETTER, // [201] 0xc9
+       INVALID_LETTER, // [202] 0xca
+       INVALID_LETTER, // [203] 0xcb
+       INVALID_LETTER, // [204] 0xcc
+       INVALID_LETTER, // [205] 0xcd
+       INVALID_LETTER, // [206] 0xce
+       INVALID_LETTER, // [207] 0xcf
+       INVALID_LETTER, // [208] 0xd0
+       INVALID_LETTER, // [209] 0xd1
+       INVALID_LETTER, // [210] 0xd2
+       INVALID_LETTER, // [211] 0xd3
+       INVALID_LETTER, // [212] 0xd4
+       INVALID_LETTER, // [213] 0xd5
+       INVALID_LETTER, // [214] 0xd6
+       INVALID_LETTER, // [215] 0xd7
+       INVALID_LETTER, // [216] 0xd8
+       INVALID_LETTER, // [217] 0xd9
+       INVALID_LETTER, // [218] 0xda
+       INVALID_LETTER, // [219] 0xdb
+       INVALID_LETTER, // [220] 0xdc
+       INVALID_LETTER, // [221] 0xdd
+       INVALID_LETTER, // [222] 0xde
+       INVALID_LETTER, // [223] 0xdf
+       INVALID_LETTER, // [224] 0xe0
+       INVALID_LETTER, // [225] 0xe1
+       INVALID_LETTER, // [226] 0xe2
+       INVALID_LETTER, // [227] 0xe3
+       INVALID_LETTER, // [228] 0xe4
+       INVALID_LETTER, // [229] 0xe5
+       INVALID_LETTER, // [230] 0xe6
+       INVALID_LETTER, // [231] 0xe7
+       INVALID_LETTER, // [232] 0xe8
+       INVALID_LETTER, // [233] 0xe9
+       INVALID_LETTER, // [234] 0xea
+       INVALID_LETTER, // [235] 0xeb
+       INVALID_LETTER, // [236] 0xec
+       INVALID_LETTER, // [237] 0xed
+       INVALID_LETTER, // [238] 0xee
+       INVALID_LETTER, // [239] 0xef
+       INVALID_LETTER, // [240] 0xf0
+       INVALID_LETTER, // [241] 0xf1
+       INVALID_LETTER, // [242] 0xf2
+       INVALID_LETTER, // [243] 0xf3
+       INVALID_LETTER, // [244] 0xf4
+       INVALID_LETTER, // [245] 0xf5
+       INVALID_LETTER, // [246] 0xf6
+       INVALID_LETTER, // [247] 0xf7
+       INVALID_LETTER, // [248] 0xf8
+       INVALID_LETTER, // [249] 0xf9
+       INVALID_LETTER, // [250] 0xfa
+       INVALID_LETTER, // [251] 0xfb
+       INVALID_LETTER, // [252] 0xfc
+       INVALID_LETTER, // [253] 0xfd
+       INVALID_LETTER, // [254] 0xfe
+       INVALID_LETTER, // [255] 0xff
+       };
+unsigned g_CharToLetterAmino[256] =
+       {
+       INVALID_LETTER, // [  0] 0x00
+       INVALID_LETTER, // [  1] 0x01
+       INVALID_LETTER, // [  2] 0x02
+       INVALID_LETTER, // [  3] 0x03
+       INVALID_LETTER, // [  4] 0x04
+       INVALID_LETTER, // [  5] 0x05
+       INVALID_LETTER, // [  6] 0x06
+       INVALID_LETTER, // [  7] 0x07
+       INVALID_LETTER, // [  8] 0x08
+       INVALID_LETTER, // [  9] 0x09
+       INVALID_LETTER, // [ 10] 0x0a
+       INVALID_LETTER, // [ 11] 0x0b
+       INVALID_LETTER, // [ 12] 0x0c
+       INVALID_LETTER, // [ 13] 0x0d
+       INVALID_LETTER, // [ 14] 0x0e
+       INVALID_LETTER, // [ 15] 0x0f
+       INVALID_LETTER, // [ 16] 0x10
+       INVALID_LETTER, // [ 17] 0x11
+       INVALID_LETTER, // [ 18] 0x12
+       INVALID_LETTER, // [ 19] 0x13
+       INVALID_LETTER, // [ 20] 0x14
+       INVALID_LETTER, // [ 21] 0x15
+       INVALID_LETTER, // [ 22] 0x16
+       INVALID_LETTER, // [ 23] 0x17
+       INVALID_LETTER, // [ 24] 0x18
+       INVALID_LETTER, // [ 25] 0x19
+       INVALID_LETTER, // [ 26] 0x1a
+       INVALID_LETTER, // [ 27] 0x1b
+       INVALID_LETTER, // [ 28] 0x1c
+       INVALID_LETTER, // [ 29] 0x1d
+       INVALID_LETTER, // [ 30] 0x1e
+       INVALID_LETTER, // [ 31] 0x1f
+       INVALID_LETTER, // [ 32] ' '
+       INVALID_LETTER, // [ 33] '!'
+       INVALID_LETTER, // [ 34] '"'
+       INVALID_LETTER, // [ 35] '#'
+       INVALID_LETTER, // [ 36] '$'
+       INVALID_LETTER, // [ 37] '%'
+       INVALID_LETTER, // [ 38] '&'
+       INVALID_LETTER, // [ 39] '''
+       INVALID_LETTER, // [ 40] '('
+       INVALID_LETTER, // [ 41] ')'
+       INVALID_LETTER, // [ 42] '*'
+       INVALID_LETTER, // [ 43] '+'
+       INVALID_LETTER, // [ 44] ','
+       INVALID_LETTER, // [ 45] '-'
+       INVALID_LETTER, // [ 46] '.'
+       INVALID_LETTER, // [ 47] '/'
+       INVALID_LETTER, // [ 48] '0'
+       INVALID_LETTER, // [ 49] '1'
+       INVALID_LETTER, // [ 50] '2'
+       INVALID_LETTER, // [ 51] '3'
+       INVALID_LETTER, // [ 52] '4'
+       INVALID_LETTER, // [ 53] '5'
+       INVALID_LETTER, // [ 54] '6'
+       INVALID_LETTER, // [ 55] '7'
+       INVALID_LETTER, // [ 56] '8'
+       INVALID_LETTER, // [ 57] '9'
+       INVALID_LETTER, // [ 58] ':'
+       INVALID_LETTER, // [ 59] ';'
+       INVALID_LETTER, // [ 60] '<'
+       INVALID_LETTER, // [ 61] '='
+       INVALID_LETTER, // [ 62] '>'
+       INVALID_LETTER, // [ 63] '?'
+       INVALID_LETTER, // [ 64] '@'
+       0  ,            // [ 65] 'A' = Ala
+       INVALID_LETTER, // [ 66] 'B'
+       1  ,            // [ 67] 'C' = Cys
+       2  ,            // [ 68] 'D' = Asp
+       3  ,            // [ 69] 'E' = Glu
+       4  ,            // [ 70] 'F' = Phe
+       5  ,            // [ 71] 'G' = Gly
+       6  ,            // [ 72] 'H' = His
+       7  ,            // [ 73] 'I' = Ile
+       INVALID_LETTER, // [ 74] 'J'
+       8  ,            // [ 75] 'K' = Lys
+       9  ,            // [ 76] 'L' = Leu
+       10 ,            // [ 77] 'M' = Met
+       11 ,            // [ 78] 'N' = Asn
+       INVALID_LETTER, // [ 79] 'O'
+       12 ,            // [ 80] 'P' = Pro
+       13 ,            // [ 81] 'Q' = Gln
+       14 ,            // [ 82] 'R' = Arg
+       15 ,            // [ 83] 'S' = Ser
+       16 ,            // [ 84] 'T' = Thr
+       INVALID_LETTER, // [ 85] 'U'
+       17 ,            // [ 86] 'V' = Val
+       18 ,            // [ 87] 'W' = Trp
+       INVALID_LETTER, // [ 88] 'X'
+       19 ,            // [ 89] 'Y' = Tyr
+       INVALID_LETTER, // [ 90] 'Z'
+       INVALID_LETTER, // [ 91] '['
+       INVALID_LETTER, // [ 92] '\'
+       INVALID_LETTER, // [ 93] ']'
+       INVALID_LETTER, // [ 94] '^'
+       INVALID_LETTER, // [ 95] '_'
+       INVALID_LETTER, // [ 96] '`'
+       0  ,            // [ 97] 'a' = Ala
+       INVALID_LETTER, // [ 98] 'b'
+       1  ,            // [ 99] 'c' = Cys
+       2  ,            // [100] 'd' = Asp
+       3  ,            // [101] 'e' = Glu
+       4  ,            // [102] 'f' = Phe
+       5  ,            // [103] 'g' = Gly
+       6  ,            // [104] 'h' = His
+       7  ,            // [105] 'i' = Ile
+       INVALID_LETTER, // [106] 'j'
+       8  ,            // [107] 'k' = Lys
+       9  ,            // [108] 'l' = Leu
+       10 ,            // [109] 'm' = Met
+       11 ,            // [110] 'n' = Asn
+       INVALID_LETTER, // [111] 'o'
+       12 ,            // [112] 'p' = Pro
+       13 ,            // [113] 'q' = Gln
+       14 ,            // [114] 'r' = Arg
+       15 ,            // [115] 's' = Ser
+       16 ,            // [116] 't' = Thr
+       INVALID_LETTER, // [117] 'u'
+       17 ,            // [118] 'v' = Val
+       18 ,            // [119] 'w' = Trp
+       INVALID_LETTER, // [120] 'x'
+       19 ,            // [121] 'y' = Tyr
+       INVALID_LETTER, // [122] 'z'
+       INVALID_LETTER, // [123] '{'
+       INVALID_LETTER, // [124] '|'
+       INVALID_LETTER, // [125] '}'
+       INVALID_LETTER, // [126] '~'
+       INVALID_LETTER, // [127] 0x7f
+       INVALID_LETTER, // [128] 0x80
+       INVALID_LETTER, // [129] 0x81
+       INVALID_LETTER, // [130] 0x82
+       INVALID_LETTER, // [131] 0x83
+       INVALID_LETTER, // [132] 0x84
+       INVALID_LETTER, // [133] 0x85
+       INVALID_LETTER, // [134] 0x86
+       INVALID_LETTER, // [135] 0x87
+       INVALID_LETTER, // [136] 0x88
+       INVALID_LETTER, // [137] 0x89
+       INVALID_LETTER, // [138] 0x8a
+       INVALID_LETTER, // [139] 0x8b
+       INVALID_LETTER, // [140] 0x8c
+       INVALID_LETTER, // [141] 0x8d
+       INVALID_LETTER, // [142] 0x8e
+       INVALID_LETTER, // [143] 0x8f
+       INVALID_LETTER, // [144] 0x90
+       INVALID_LETTER, // [145] 0x91
+       INVALID_LETTER, // [146] 0x92
+       INVALID_LETTER, // [147] 0x93
+       INVALID_LETTER, // [148] 0x94
+       INVALID_LETTER, // [149] 0x95
+       INVALID_LETTER, // [150] 0x96
+       INVALID_LETTER, // [151] 0x97
+       INVALID_LETTER, // [152] 0x98
+       INVALID_LETTER, // [153] 0x99
+       INVALID_LETTER, // [154] 0x9a
+       INVALID_LETTER, // [155] 0x9b
+       INVALID_LETTER, // [156] 0x9c
+       INVALID_LETTER, // [157] 0x9d
+       INVALID_LETTER, // [158] 0x9e
+       INVALID_LETTER, // [159] 0x9f
+       INVALID_LETTER, // [160] 0xa0
+       INVALID_LETTER, // [161] 0xa1
+       INVALID_LETTER, // [162] 0xa2
+       INVALID_LETTER, // [163] 0xa3
+       INVALID_LETTER, // [164] 0xa4
+       INVALID_LETTER, // [165] 0xa5
+       INVALID_LETTER, // [166] 0xa6
+       INVALID_LETTER, // [167] 0xa7
+       INVALID_LETTER, // [168] 0xa8
+       INVALID_LETTER, // [169] 0xa9
+       INVALID_LETTER, // [170] 0xaa
+       INVALID_LETTER, // [171] 0xab
+       INVALID_LETTER, // [172] 0xac
+       INVALID_LETTER, // [173] 0xad
+       INVALID_LETTER, // [174] 0xae
+       INVALID_LETTER, // [175] 0xaf
+       INVALID_LETTER, // [176] 0xb0
+       INVALID_LETTER, // [177] 0xb1
+       INVALID_LETTER, // [178] 0xb2
+       INVALID_LETTER, // [179] 0xb3
+       INVALID_LETTER, // [180] 0xb4
+       INVALID_LETTER, // [181] 0xb5
+       INVALID_LETTER, // [182] 0xb6
+       INVALID_LETTER, // [183] 0xb7
+       INVALID_LETTER, // [184] 0xb8
+       INVALID_LETTER, // [185] 0xb9
+       INVALID_LETTER, // [186] 0xba
+       INVALID_LETTER, // [187] 0xbb
+       INVALID_LETTER, // [188] 0xbc
+       INVALID_LETTER, // [189] 0xbd
+       INVALID_LETTER, // [190] 0xbe
+       INVALID_LETTER, // [191] 0xbf
+       INVALID_LETTER, // [192] 0xc0
+       INVALID_LETTER, // [193] 0xc1
+       INVALID_LETTER, // [194] 0xc2
+       INVALID_LETTER, // [195] 0xc3
+       INVALID_LETTER, // [196] 0xc4
+       INVALID_LETTER, // [197] 0xc5
+       INVALID_LETTER, // [198] 0xc6
+       INVALID_LETTER, // [199] 0xc7
+       INVALID_LETTER, // [200] 0xc8
+       INVALID_LETTER, // [201] 0xc9
+       INVALID_LETTER, // [202] 0xca
+       INVALID_LETTER, // [203] 0xcb
+       INVALID_LETTER, // [204] 0xcc
+       INVALID_LETTER, // [205] 0xcd
+       INVALID_LETTER, // [206] 0xce
+       INVALID_LETTER, // [207] 0xcf
+       INVALID_LETTER, // [208] 0xd0
+       INVALID_LETTER, // [209] 0xd1
+       INVALID_LETTER, // [210] 0xd2
+       INVALID_LETTER, // [211] 0xd3
+       INVALID_LETTER, // [212] 0xd4
+       INVALID_LETTER, // [213] 0xd5
+       INVALID_LETTER, // [214] 0xd6
+       INVALID_LETTER, // [215] 0xd7
+       INVALID_LETTER, // [216] 0xd8
+       INVALID_LETTER, // [217] 0xd9
+       INVALID_LETTER, // [218] 0xda
+       INVALID_LETTER, // [219] 0xdb
+       INVALID_LETTER, // [220] 0xdc
+       INVALID_LETTER, // [221] 0xdd
+       INVALID_LETTER, // [222] 0xde
+       INVALID_LETTER, // [223] 0xdf
+       INVALID_LETTER, // [224] 0xe0
+       INVALID_LETTER, // [225] 0xe1
+       INVALID_LETTER, // [226] 0xe2
+       INVALID_LETTER, // [227] 0xe3
+       INVALID_LETTER, // [228] 0xe4
+       INVALID_LETTER, // [229] 0xe5
+       INVALID_LETTER, // [230] 0xe6
+       INVALID_LETTER, // [231] 0xe7
+       INVALID_LETTER, // [232] 0xe8
+       INVALID_LETTER, // [233] 0xe9
+       INVALID_LETTER, // [234] 0xea
+       INVALID_LETTER, // [235] 0xeb
+       INVALID_LETTER, // [236] 0xec
+       INVALID_LETTER, // [237] 0xed
+       INVALID_LETTER, // [238] 0xee
+       INVALID_LETTER, // [239] 0xef
+       INVALID_LETTER, // [240] 0xf0
+       INVALID_LETTER, // [241] 0xf1
+       INVALID_LETTER, // [242] 0xf2
+       INVALID_LETTER, // [243] 0xf3
+       INVALID_LETTER, // [244] 0xf4
+       INVALID_LETTER, // [245] 0xf5
+       INVALID_LETTER, // [246] 0xf6
+       INVALID_LETTER, // [247] 0xf7
+       INVALID_LETTER, // [248] 0xf8
+       INVALID_LETTER, // [249] 0xf9
+       INVALID_LETTER, // [250] 0xfa
+       INVALID_LETTER, // [251] 0xfb
+       INVALID_LETTER, // [252] 0xfc
+       INVALID_LETTER, // [253] 0xfd
+       INVALID_LETTER, // [254] 0xfe
+       INVALID_LETTER, // [255] 0xff
+       };
+
+unsigned char g_LetterToCharAmino[256] =
+       {
+       'A', // [0] 
+       'C', // [1] 
+       'D', // [2] 
+       'E', // [3] 
+       'F', // [4] 
+       'G', // [5] 
+       'H', // [6] 
+       'I', // [7] 
+       'K', // [8] 
+       'L', // [9] 
+       'M', // [10] 
+       'N', // [11] 
+       'P', // [12] 
+       'Q', // [13] 
+       'R', // [14] 
+       'S', // [15] 
+       'T', // [16] 
+       'V', // [17] 
+       'W', // [18] 
+       'Y', // [19] 
+       '*', // [20] 
+       INVALID_CHAR, // [21]
+       INVALID_CHAR, // [22]
+       INVALID_CHAR, // [23]
+       INVALID_CHAR, // [24]
+       INVALID_CHAR, // [25]
+       INVALID_CHAR, // [26]
+       INVALID_CHAR, // [27]
+       INVALID_CHAR, // [28]
+       INVALID_CHAR, // [29]
+       INVALID_CHAR, // [30]
+       INVALID_CHAR, // [31]
+       INVALID_CHAR, // [32]
+       INVALID_CHAR, // [33]
+       INVALID_CHAR, // [34]
+       INVALID_CHAR, // [35]
+       INVALID_CHAR, // [36]
+       INVALID_CHAR, // [37]
+       INVALID_CHAR, // [38]
+       INVALID_CHAR, // [39]
+       INVALID_CHAR, // [40]
+       INVALID_CHAR, // [41]
+       INVALID_CHAR, // [42]
+       INVALID_CHAR, // [43]
+       INVALID_CHAR, // [44]
+       INVALID_CHAR, // [45]
+       INVALID_CHAR, // [46]
+       INVALID_CHAR, // [47]
+       INVALID_CHAR, // [48]
+       INVALID_CHAR, // [49]
+       INVALID_CHAR, // [50]
+       INVALID_CHAR, // [51]
+       INVALID_CHAR, // [52]
+       INVALID_CHAR, // [53]
+       INVALID_CHAR, // [54]
+       INVALID_CHAR, // [55]
+       INVALID_CHAR, // [56]
+       INVALID_CHAR, // [57]
+       INVALID_CHAR, // [58]
+       INVALID_CHAR, // [59]
+       INVALID_CHAR, // [60]
+       INVALID_CHAR, // [61]
+       INVALID_CHAR, // [62]
+       INVALID_CHAR, // [63]
+       INVALID_CHAR, // [64]
+       INVALID_CHAR, // [65]
+       INVALID_CHAR, // [66]
+       INVALID_CHAR, // [67]
+       INVALID_CHAR, // [68]
+       INVALID_CHAR, // [69]
+       INVALID_CHAR, // [70]
+       INVALID_CHAR, // [71]
+       INVALID_CHAR, // [72]
+       INVALID_CHAR, // [73]
+       INVALID_CHAR, // [74]
+       INVALID_CHAR, // [75]
+       INVALID_CHAR, // [76]
+       INVALID_CHAR, // [77]
+       INVALID_CHAR, // [78]
+       INVALID_CHAR, // [79]
+       INVALID_CHAR, // [80]
+       INVALID_CHAR, // [81]
+       INVALID_CHAR, // [82]
+       INVALID_CHAR, // [83]
+       INVALID_CHAR, // [84]
+       INVALID_CHAR, // [85]
+       INVALID_CHAR, // [86]
+       INVALID_CHAR, // [87]
+       INVALID_CHAR, // [88]
+       INVALID_CHAR, // [89]
+       INVALID_CHAR, // [90]
+       INVALID_CHAR, // [91]
+       INVALID_CHAR, // [92]
+       INVALID_CHAR, // [93]
+       INVALID_CHAR, // [94]
+       INVALID_CHAR, // [95]
+       INVALID_CHAR, // [96]
+       INVALID_CHAR, // [97]
+       INVALID_CHAR, // [98]
+       INVALID_CHAR, // [99]
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       INVALID_CHAR, // [103]
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       INVALID_CHAR, // [116]
+       INVALID_CHAR, // [117]
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+       };
+
+unsigned g_CharToLetterNucleo[256] =
+       {
+       INVALID_LETTER, // [  0] = 0x00
+       INVALID_LETTER, // [  1] = 0x01
+       INVALID_LETTER, // [  2] = 0x02
+       INVALID_LETTER, // [  3] = 0x03
+       INVALID_LETTER, // [  4] = 0x04
+       INVALID_LETTER, // [  5] = 0x05
+       INVALID_LETTER, // [  6] = 0x06
+       INVALID_LETTER, // [  7] = 0x07
+       INVALID_LETTER, // [  8] = 0x08
+       INVALID_LETTER, // [  9] = 0x09
+       INVALID_LETTER, // [ 10] = 0x0a
+       INVALID_LETTER, // [ 11] = 0x0b
+       INVALID_LETTER, // [ 12] = 0x0c
+       INVALID_LETTER, // [ 13] = 0x0d
+       INVALID_LETTER, // [ 14] = 0x0e
+       INVALID_LETTER, // [ 15] = 0x0f
+       INVALID_LETTER, // [ 16] = 0x10
+       INVALID_LETTER, // [ 17] = 0x11
+       INVALID_LETTER, // [ 18] = 0x12
+       INVALID_LETTER, // [ 19] = 0x13
+       INVALID_LETTER, // [ 20] = 0x14
+       INVALID_LETTER, // [ 21] = 0x15
+       INVALID_LETTER, // [ 22] = 0x16
+       INVALID_LETTER, // [ 23] = 0x17
+       INVALID_LETTER, // [ 24] = 0x18
+       INVALID_LETTER, // [ 25] = 0x19
+       INVALID_LETTER, // [ 26] = 0x1a
+       INVALID_LETTER, // [ 27] = 0x1b
+       INVALID_LETTER, // [ 28] = 0x1c
+       INVALID_LETTER, // [ 29] = 0x1d
+       INVALID_LETTER, // [ 30] = 0x1e
+       INVALID_LETTER, // [ 31] = 0x1f
+       INVALID_LETTER, // [ 32] = 32
+       INVALID_LETTER, // [ 33] = 33
+       INVALID_LETTER, // [ 34] = 34
+       INVALID_LETTER, // [ 35] = 35
+       INVALID_LETTER, // [ 36] = 36
+       INVALID_LETTER, // [ 37] = 37
+       INVALID_LETTER, // [ 38] = 38
+       INVALID_LETTER, // [ 39] = 39
+       INVALID_LETTER, // [ 40] = 40
+       INVALID_LETTER, // [ 41] = 41
+       INVALID_LETTER, // [ 42] = 42
+       INVALID_LETTER, // [ 43] = 43
+       INVALID_LETTER, // [ 44] = 44
+       INVALID_LETTER, // [ 45] = 45
+       INVALID_LETTER, // [ 46] = 46
+       INVALID_LETTER, // [ 47] = 47
+       INVALID_LETTER, // [ 48] = 48
+       INVALID_LETTER, // [ 49] = 49
+       INVALID_LETTER, // [ 50] = 50
+       INVALID_LETTER, // [ 51] = 51
+       INVALID_LETTER, // [ 52] = 52
+       INVALID_LETTER, // [ 53] = 53
+       INVALID_LETTER, // [ 54] = 54
+       INVALID_LETTER, // [ 55] = 55
+       INVALID_LETTER, // [ 56] = 56
+       INVALID_LETTER, // [ 57] = 57
+       INVALID_LETTER, // [ 58] = 58
+       INVALID_LETTER, // [ 59] = 59
+       INVALID_LETTER, // [ 60] = 60
+       INVALID_LETTER, // [ 61] = 61
+       INVALID_LETTER, // [ 62] = 62
+       INVALID_LETTER, // [ 63] = 63
+       INVALID_LETTER, // [ 64] = 64
+       0  ,            // [ 65] = A (Nucleotide)
+       INVALID_LETTER, // [ 66] = 66
+       1  ,            // [ 67] = C (Nucleotide)
+       INVALID_LETTER, // [ 68] = 68
+       INVALID_LETTER, // [ 69] = 69
+       INVALID_LETTER, // [ 70] = 70
+       2  ,            // [ 71] = G (Nucleotide)
+       INVALID_LETTER, // [ 72] = 72
+       INVALID_LETTER, // [ 73] = 73
+       INVALID_LETTER, // [ 74] = 74
+       INVALID_LETTER, // [ 75] = 75
+       INVALID_LETTER, // [ 76] = 76
+       INVALID_LETTER, // [ 77] = 77
+       INVALID_LETTER, // [ 78] = 78
+       INVALID_LETTER, // [ 79] = 79
+       INVALID_LETTER, // [ 80] = 80
+       INVALID_LETTER, // [ 81] = 81
+       INVALID_LETTER, // [ 82] = 82
+       INVALID_LETTER, // [ 83] = 83
+       3  ,            // [ 84] = T (Nucleotide)
+       3  ,            // [ 85] = U (Nucleotide)
+       INVALID_LETTER, // [ 86] = 86
+       INVALID_LETTER, // [ 87] = 87
+       INVALID_LETTER, // [ 88] = 88
+       INVALID_LETTER, // [ 89] = 89
+       INVALID_LETTER, // [ 90] = 90
+       INVALID_LETTER, // [ 91] = 91
+       INVALID_LETTER, // [ 92] = 92
+       INVALID_LETTER, // [ 93] = 93
+       INVALID_LETTER, // [ 94] = 94
+       INVALID_LETTER, // [ 95] = 95
+       INVALID_LETTER, // [ 96] = 96
+       0  ,            // [ 97] = a (Nucleotide)
+       INVALID_LETTER, // [ 98] = 98
+       1  ,            // [ 99] = c (Nucleotide)
+       INVALID_LETTER, // [100] = 100
+       INVALID_LETTER, // [101] = 101
+       INVALID_LETTER, // [102] = 102
+       2  ,            // [103] = g (Nucleotide)
+       INVALID_LETTER, // [104] = 104
+       INVALID_LETTER, // [105] = 105
+       INVALID_LETTER, // [106] = 106
+       INVALID_LETTER, // [107] = 107
+       INVALID_LETTER, // [108] = 108
+       INVALID_LETTER, // [109] = 109
+       INVALID_LETTER, // [110] = 110
+       INVALID_LETTER, // [111] = 111
+       INVALID_LETTER, // [112] = 112
+       INVALID_LETTER, // [113] = 113
+       INVALID_LETTER, // [114] = 114
+       INVALID_LETTER, // [115] = 115
+       3  ,            // [116] = t (Nucleotide)
+       3  ,            // [117] = u (Nucleotide)
+       INVALID_LETTER, // [118] = 118
+       INVALID_LETTER, // [119] = 119
+       INVALID_LETTER, // [120] = 120
+       INVALID_LETTER, // [121] = 121
+       INVALID_LETTER, // [122] = 122
+       INVALID_LETTER, // [123] = 123
+       INVALID_LETTER, // [124] = 124
+       INVALID_LETTER, // [125] = 125
+       INVALID_LETTER, // [126] = 126
+       INVALID_LETTER, // [127] = 0x7f
+       INVALID_LETTER, // [128] = 0x80
+       INVALID_LETTER, // [129] = 0x81
+       INVALID_LETTER, // [130] = 0x82
+       INVALID_LETTER, // [131] = 0x83
+       INVALID_LETTER, // [132] = 0x84
+       INVALID_LETTER, // [133] = 0x85
+       INVALID_LETTER, // [134] = 0x86
+       INVALID_LETTER, // [135] = 0x87
+       INVALID_LETTER, // [136] = 0x88
+       INVALID_LETTER, // [137] = 0x89
+       INVALID_LETTER, // [138] = 0x8a
+       INVALID_LETTER, // [139] = 0x8b
+       INVALID_LETTER, // [140] = 0x8c
+       INVALID_LETTER, // [141] = 0x8d
+       INVALID_LETTER, // [142] = 0x8e
+       INVALID_LETTER, // [143] = 0x8f
+       INVALID_LETTER, // [144] = 0x90
+       INVALID_LETTER, // [145] = 0x91
+       INVALID_LETTER, // [146] = 0x92
+       INVALID_LETTER, // [147] = 0x93
+       INVALID_LETTER, // [148] = 0x94
+       INVALID_LETTER, // [149] = 0x95
+       INVALID_LETTER, // [150] = 0x96
+       INVALID_LETTER, // [151] = 0x97
+       INVALID_LETTER, // [152] = 0x98
+       INVALID_LETTER, // [153] = 0x99
+       INVALID_LETTER, // [154] = 0x9a
+       INVALID_LETTER, // [155] = 0x9b
+       INVALID_LETTER, // [156] = 0x9c
+       INVALID_LETTER, // [157] = 0x9d
+       INVALID_LETTER, // [158] = 0x9e
+       INVALID_LETTER, // [159] = 0x9f
+       INVALID_LETTER, // [160] = 0xa0
+       INVALID_LETTER, // [161] = 0xa1
+       INVALID_LETTER, // [162] = 0xa2
+       INVALID_LETTER, // [163] = 0xa3
+       INVALID_LETTER, // [164] = 0xa4
+       INVALID_LETTER, // [165] = 0xa5
+       INVALID_LETTER, // [166] = 0xa6
+       INVALID_LETTER, // [167] = 0xa7
+       INVALID_LETTER, // [168] = 0xa8
+       INVALID_LETTER, // [169] = 0xa9
+       INVALID_LETTER, // [170] = 0xaa
+       INVALID_LETTER, // [171] = 0xab
+       INVALID_LETTER, // [172] = 0xac
+       INVALID_LETTER, // [173] = 0xad
+       INVALID_LETTER, // [174] = 0xae
+       INVALID_LETTER, // [175] = 0xaf
+       INVALID_LETTER, // [176] = 0xb0
+       INVALID_LETTER, // [177] = 0xb1
+       INVALID_LETTER, // [178] = 0xb2
+       INVALID_LETTER, // [179] = 0xb3
+       INVALID_LETTER, // [180] = 0xb4
+       INVALID_LETTER, // [181] = 0xb5
+       INVALID_LETTER, // [182] = 0xb6
+       INVALID_LETTER, // [183] = 0xb7
+       INVALID_LETTER, // [184] = 0xb8
+       INVALID_LETTER, // [185] = 0xb9
+       INVALID_LETTER, // [186] = 0xba
+       INVALID_LETTER, // [187] = 0xbb
+       INVALID_LETTER, // [188] = 0xbc
+       INVALID_LETTER, // [189] = 0xbd
+       INVALID_LETTER, // [190] = 0xbe
+       INVALID_LETTER, // [191] = 0xbf
+       INVALID_LETTER, // [192] = 0xc0
+       INVALID_LETTER, // [193] = 0xc1
+       INVALID_LETTER, // [194] = 0xc2
+       INVALID_LETTER, // [195] = 0xc3
+       INVALID_LETTER, // [196] = 0xc4
+       INVALID_LETTER, // [197] = 0xc5
+       INVALID_LETTER, // [198] = 0xc6
+       INVALID_LETTER, // [199] = 0xc7
+       INVALID_LETTER, // [200] = 0xc8
+       INVALID_LETTER, // [201] = 0xc9
+       INVALID_LETTER, // [202] = 0xca
+       INVALID_LETTER, // [203] = 0xcb
+       INVALID_LETTER, // [204] = 0xcc
+       INVALID_LETTER, // [205] = 0xcd
+       INVALID_LETTER, // [206] = 0xce
+       INVALID_LETTER, // [207] = 0xcf
+       INVALID_LETTER, // [208] = 0xd0
+       INVALID_LETTER, // [209] = 0xd1
+       INVALID_LETTER, // [210] = 0xd2
+       INVALID_LETTER, // [211] = 0xd3
+       INVALID_LETTER, // [212] = 0xd4
+       INVALID_LETTER, // [213] = 0xd5
+       INVALID_LETTER, // [214] = 0xd6
+       INVALID_LETTER, // [215] = 0xd7
+       INVALID_LETTER, // [216] = 0xd8
+       INVALID_LETTER, // [217] = 0xd9
+       INVALID_LETTER, // [218] = 0xda
+       INVALID_LETTER, // [219] = 0xdb
+       INVALID_LETTER, // [220] = 0xdc
+       INVALID_LETTER, // [221] = 0xdd
+       INVALID_LETTER, // [222] = 0xde
+       INVALID_LETTER, // [223] = 0xdf
+       INVALID_LETTER, // [224] = 0xe0
+       INVALID_LETTER, // [225] = 0xe1
+       INVALID_LETTER, // [226] = 0xe2
+       INVALID_LETTER, // [227] = 0xe3
+       INVALID_LETTER, // [228] = 0xe4
+       INVALID_LETTER, // [229] = 0xe5
+       INVALID_LETTER, // [230] = 0xe6
+       INVALID_LETTER, // [231] = 0xe7
+       INVALID_LETTER, // [232] = 0xe8
+       INVALID_LETTER, // [233] = 0xe9
+       INVALID_LETTER, // [234] = 0xea
+       INVALID_LETTER, // [235] = 0xeb
+       INVALID_LETTER, // [236] = 0xec
+       INVALID_LETTER, // [237] = 0xed
+       INVALID_LETTER, // [238] = 0xee
+       INVALID_LETTER, // [239] = 0xef
+       INVALID_LETTER, // [240] = 0xf0
+       INVALID_LETTER, // [241] = 0xf1
+       INVALID_LETTER, // [242] = 0xf2
+       INVALID_LETTER, // [243] = 0xf3
+       INVALID_LETTER, // [244] = 0xf4
+       INVALID_LETTER, // [245] = 0xf5
+       INVALID_LETTER, // [246] = 0xf6
+       INVALID_LETTER, // [247] = 0xf7
+       INVALID_LETTER, // [248] = 0xf8
+       INVALID_LETTER, // [249] = 0xf9
+       INVALID_LETTER, // [250] = 0xfa
+       INVALID_LETTER, // [251] = 0xfb
+       INVALID_LETTER, // [252] = 0xfc
+       INVALID_LETTER, // [253] = 0xfd
+       INVALID_LETTER, // [254] = 0xfe
+       INVALID_LETTER, // [255] = 0xff
+       };
+
+unsigned char g_LetterToCharNucleo[256] =
+       {
+       'A', // [0]
+       'C', // [1]
+       'G', // [2]
+       'T', // [3]
+       INVALID_CHAR, // [4]
+       INVALID_CHAR, // [5]
+       INVALID_CHAR, // [6]
+       INVALID_CHAR, // [7]
+       INVALID_CHAR, // [8]
+       INVALID_CHAR, // [9]
+       INVALID_CHAR, // [10]
+       INVALID_CHAR, // [11]
+       INVALID_CHAR, // [12]
+       INVALID_CHAR, // [13]
+       INVALID_CHAR, // [14]
+       INVALID_CHAR, // [15]
+       INVALID_CHAR, // [16]
+       INVALID_CHAR, // [17]
+       INVALID_CHAR, // [18]
+       INVALID_CHAR, // [19]
+       INVALID_CHAR, // [20]
+       INVALID_CHAR, // [21]
+       INVALID_CHAR, // [22]
+       INVALID_CHAR, // [23]
+       INVALID_CHAR, // [24]
+       INVALID_CHAR, // [25]
+       INVALID_CHAR, // [26]
+       INVALID_CHAR, // [27]
+       INVALID_CHAR, // [28]
+       INVALID_CHAR, // [29]
+       INVALID_CHAR, // [30]
+       INVALID_CHAR, // [31]
+       INVALID_CHAR, // [32]
+       INVALID_CHAR, // [33]
+       INVALID_CHAR, // [34]
+       INVALID_CHAR, // [35]
+       INVALID_CHAR, // [36]
+       INVALID_CHAR, // [37]
+       INVALID_CHAR, // [38]
+       INVALID_CHAR, // [39]
+       INVALID_CHAR, // [40]
+       INVALID_CHAR, // [41]
+       INVALID_CHAR, // [42]
+       INVALID_CHAR, // [43]
+       INVALID_CHAR, // [44]
+       INVALID_CHAR, // [45]
+       INVALID_CHAR, // [46]
+       INVALID_CHAR, // [47]
+       INVALID_CHAR, // [48]
+       INVALID_CHAR, // [49]
+       INVALID_CHAR, // [50]
+       INVALID_CHAR, // [51]
+       INVALID_CHAR, // [52]
+       INVALID_CHAR, // [53]
+       INVALID_CHAR, // [54]
+       INVALID_CHAR, // [55]
+       INVALID_CHAR, // [56]
+       INVALID_CHAR, // [57]
+       INVALID_CHAR, // [58]
+       INVALID_CHAR, // [59]
+       INVALID_CHAR, // [60]
+       INVALID_CHAR, // [61]
+       INVALID_CHAR, // [62]
+       INVALID_CHAR, // [63]
+       INVALID_CHAR, // [64]
+       INVALID_CHAR, // [65]
+       INVALID_CHAR, // [66]
+       INVALID_CHAR, // [67]
+       INVALID_CHAR, // [68]
+       INVALID_CHAR, // [69]
+       INVALID_CHAR, // [70]
+       INVALID_CHAR, // [71]
+       INVALID_CHAR, // [72]
+       INVALID_CHAR, // [73]
+       INVALID_CHAR, // [74]
+       INVALID_CHAR, // [75]
+       INVALID_CHAR, // [76]
+       INVALID_CHAR, // [77]
+       INVALID_CHAR, // [78]
+       INVALID_CHAR, // [79]
+       INVALID_CHAR, // [80]
+       INVALID_CHAR, // [81]
+       INVALID_CHAR, // [82]
+       INVALID_CHAR, // [83]
+       INVALID_CHAR, // [84]
+       INVALID_CHAR, // [85]
+       INVALID_CHAR, // [86]
+       INVALID_CHAR, // [87]
+       INVALID_CHAR, // [88]
+       INVALID_CHAR, // [89]
+       INVALID_CHAR, // [90]
+       INVALID_CHAR, // [91]
+       INVALID_CHAR, // [92]
+       INVALID_CHAR, // [93]
+       INVALID_CHAR, // [94]
+       INVALID_CHAR, // [95]
+       INVALID_CHAR, // [96]
+       INVALID_CHAR, // [97]
+       INVALID_CHAR, // [98]
+       INVALID_CHAR, // [99]
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       INVALID_CHAR, // [103]
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       INVALID_CHAR, // [116]
+       INVALID_CHAR, // [117]
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+       };
+
+unsigned g_CodonWordToAminoLetter[4*4*4] =
+       {
+       8 , // [ 0] = AAA K (Lys)
+       11, // [ 1] = AAC N (Asn)
+       8 , // [ 2] = AAG K (Lys)
+       11, // [ 3] = AAT N (Asn)
+       16, // [ 4] = ACA T (Thr)
+       16, // [ 5] = ACC T (Thr)
+       16, // [ 6] = ACG T (Thr)
+       16, // [ 7] = ACT T (Thr)
+       14, // [ 8] = AGA R (Arg)
+       15, // [ 9] = AGC S (Ser)
+       14, // [10] = AGG R (Arg)
+       15, // [11] = AGT S (Ser)
+       7 , // [12] = ATA I (Ile)
+       7 , // [13] = ATC I (Ile)
+       10, // [14] = ATG M (Met)
+       7 , // [15] = ATT I (Ile)
+       13, // [16] = CAA Q (Gln)
+       6 , // [17] = CAC H (His)
+       13, // [18] = CAG Q (Gln)
+       6 , // [19] = CAT H (His)
+       12, // [20] = CCA P (Pro)
+       12, // [21] = CCC P (Pro)
+       12, // [22] = CCG P (Pro)
+       12, // [23] = CCT P (Pro)
+       14, // [24] = CGA R (Arg)
+       14, // [25] = CGC R (Arg)
+       14, // [26] = CGG R (Arg)
+       14, // [27] = CGT R (Arg)
+       9 , // [28] = CTA L (Leu)
+       9 , // [29] = CTC L (Leu)
+       9 , // [30] = CTG L (Leu)
+       9 , // [31] = CTT L (Leu)
+       3 , // [32] = GAA E (Glu)
+       2 , // [33] = GAC D (Asp)
+       3 , // [34] = GAG E (Glu)
+       2 , // [35] = GAT D (Asp)
+       0 , // [36] = GCA A (Ala)
+       0 , // [37] = GCC A (Ala)
+       0 , // [38] = GCG A (Ala)
+       0 , // [39] = GCT A (Ala)
+       5 , // [40] = GGA G (Gly)
+       5 , // [41] = GGC G (Gly)
+       5 , // [42] = GGG G (Gly)
+       5 , // [43] = GGT G (Gly)
+       17, // [44] = GTA V (Val)
+       17, // [45] = GTC V (Val)
+       17, // [46] = GTG V (Val)
+       17, // [47] = GTT V (Val)
+       20, // [48] = TAA * (STP)
+       19, // [49] = TAC Y (Tyr)
+       20, // [50] = TAG * (STP)
+       19, // [51] = TAT Y (Tyr)
+       15, // [52] = TCA S (Ser)
+       15, // [53] = TCC S (Ser)
+       15, // [54] = TCG S (Ser)
+       15, // [55] = TCT S (Ser)
+       20, // [56] = TGA * (STP)
+       1 , // [57] = TGC C (Cys)
+       18, // [58] = TGG W (Trp)
+       1 , // [59] = TGT C (Cys)
+       9 , // [60] = TTA L (Leu)
+       4 , // [61] = TTC F (Phe)
+       9 , // [62] = TTG L (Leu)
+       4 , // [63] = TTT F (Phe)
+       };
+
+char g_CodonWordToAminoChar[4*4*4] =
+       {
+       'K', // [ 0] = AAA (Lys)
+       'N', // [ 1] = AAC (Asn)
+       'K', // [ 2] = AAG (Lys)
+       'N', // [ 3] = AAT (Asn)
+       'T', // [ 4] = ACA (Thr)
+       'T', // [ 5] = ACC (Thr)
+       'T', // [ 6] = ACG (Thr)
+       'T', // [ 7] = ACT (Thr)
+       'R', // [ 8] = AGA (Arg)
+       'S', // [ 9] = AGC (Ser)
+       'R', // [10] = AGG (Arg)
+       'S', // [11] = AGT (Ser)
+       'I', // [12] = ATA (Ile)
+       'I', // [13] = ATC (Ile)
+       'M', // [14] = ATG (Met)
+       'I', // [15] = ATT (Ile)
+       'Q', // [16] = CAA (Gln)
+       'H', // [17] = CAC (His)
+       'Q', // [18] = CAG (Gln)
+       'H', // [19] = CAT (His)
+       'P', // [20] = CCA (Pro)
+       'P', // [21] = CCC (Pro)
+       'P', // [22] = CCG (Pro)
+       'P', // [23] = CCT (Pro)
+       'R', // [24] = CGA (Arg)
+       'R', // [25] = CGC (Arg)
+       'R', // [26] = CGG (Arg)
+       'R', // [27] = CGT (Arg)
+       'L', // [28] = CTA (Leu)
+       'L', // [29] = CTC (Leu)
+       'L', // [30] = CTG (Leu)
+       'L', // [31] = CTT (Leu)
+       'E', // [32] = GAA (Glu)
+       'D', // [33] = GAC (Asp)
+       'E', // [34] = GAG (Glu)
+       'D', // [35] = GAT (Asp)
+       'A', // [36] = GCA (Ala)
+       'A', // [37] = GCC (Ala)
+       'A', // [38] = GCG (Ala)
+       'A', // [39] = GCT (Ala)
+       'G', // [40] = GGA (Gly)
+       'G', // [41] = GGC (Gly)
+       'G', // [42] = GGG (Gly)
+       'G', // [43] = GGT (Gly)
+       'V', // [44] = GTA (Val)
+       'V', // [45] = GTC (Val)
+       'V', // [46] = GTG (Val)
+       'V', // [47] = GTT (Val)
+       '*', // [48] = TAA (STP)
+       'Y', // [49] = TAC (Tyr)
+       '*', // [50] = TAG (STP)
+       'Y', // [51] = TAT (Tyr)
+       'S', // [52] = TCA (Ser)
+       'S', // [53] = TCC (Ser)
+       'S', // [54] = TCG (Ser)
+       'S', // [55] = TCT (Ser)
+       '*', // [56] = TGA (STP)
+       'C', // [57] = TGC (Cys)
+       'W', // [58] = TGG (Trp)
+       'C', // [59] = TGT (Cys)
+       'L', // [60] = TTA (Leu)
+       'F', // [61] = TTC (Phe)
+       'L', // [62] = TTG (Leu)
+       'F', // [63] = TTT (Phe)
+       };
+
+unsigned char g_CharToCompChar[256] =
+       {
+       INVALID_CHAR, // [  0]
+       INVALID_CHAR, // [  1]
+       INVALID_CHAR, // [  2]
+       INVALID_CHAR, // [  3]
+       INVALID_CHAR, // [  4]
+       INVALID_CHAR, // [  5]
+       INVALID_CHAR, // [  6]
+       INVALID_CHAR, // [  7]
+       INVALID_CHAR, // [  8]
+       INVALID_CHAR, // [  9]
+       INVALID_CHAR, // [ 10]
+       INVALID_CHAR, // [ 11]
+       INVALID_CHAR, // [ 12]
+       INVALID_CHAR, // [ 13]
+       INVALID_CHAR, // [ 14]
+       INVALID_CHAR, // [ 15]
+       INVALID_CHAR, // [ 16]
+       INVALID_CHAR, // [ 17]
+       INVALID_CHAR, // [ 18]
+       INVALID_CHAR, // [ 19]
+       INVALID_CHAR, // [ 20]
+       INVALID_CHAR, // [ 21]
+       INVALID_CHAR, // [ 22]
+       INVALID_CHAR, // [ 23]
+       INVALID_CHAR, // [ 24]
+       INVALID_CHAR, // [ 25]
+       INVALID_CHAR, // [ 26]
+       INVALID_CHAR, // [ 27]
+       INVALID_CHAR, // [ 28]
+       INVALID_CHAR, // [ 29]
+       INVALID_CHAR, // [ 30]
+       INVALID_CHAR, // [ 31]
+       INVALID_CHAR, // [ 32]
+       INVALID_CHAR, // [ 33]
+       INVALID_CHAR, // [ 34]
+       INVALID_CHAR, // [ 35]
+       INVALID_CHAR, // [ 36]
+       INVALID_CHAR, // [ 37]
+       INVALID_CHAR, // [ 38]
+       INVALID_CHAR, // [ 39]
+       INVALID_CHAR, // [ 40]
+       INVALID_CHAR, // [ 41]
+       INVALID_CHAR, // [ 42]
+       INVALID_CHAR, // [ 43]
+       INVALID_CHAR, // [ 44]
+       INVALID_CHAR, // [ 45]
+       INVALID_CHAR, // [ 46]
+       INVALID_CHAR, // [ 47]
+       INVALID_CHAR, // [ 48]
+       INVALID_CHAR, // [ 49]
+       INVALID_CHAR, // [ 50]
+       INVALID_CHAR, // [ 51]
+       INVALID_CHAR, // [ 52]
+       INVALID_CHAR, // [ 53]
+       INVALID_CHAR, // [ 54]
+       INVALID_CHAR, // [ 55]
+       INVALID_CHAR, // [ 56]
+       INVALID_CHAR, // [ 57]
+       INVALID_CHAR, // [ 58]
+       INVALID_CHAR, // [ 59]
+       INVALID_CHAR, // [ 60]
+       INVALID_CHAR, // [ 61]
+       INVALID_CHAR, // [ 62]
+       INVALID_CHAR, // [ 63]
+       INVALID_CHAR, // [ 64]
+       'T',          // [ 65] A -> T
+       INVALID_CHAR, // [ 66]
+       'G',          // [ 67] C -> G
+       INVALID_CHAR, // [ 68]
+       INVALID_CHAR, // [ 69]
+       INVALID_CHAR, // [ 70]
+       'C',          // [ 71] G -> C
+       INVALID_CHAR, // [ 72]
+       INVALID_CHAR, // [ 73]
+       INVALID_CHAR, // [ 74]
+       INVALID_CHAR, // [ 75]
+       INVALID_CHAR, // [ 76]
+       INVALID_CHAR, // [ 77]
+       INVALID_CHAR, // [ 78]
+       INVALID_CHAR, // [ 79]
+       INVALID_CHAR, // [ 80]
+       INVALID_CHAR, // [ 81]
+       INVALID_CHAR, // [ 82]
+       INVALID_CHAR, // [ 83]
+       'A',          // [ 84] T -> A
+       'A',          // [ 85] U -> A
+       INVALID_CHAR, // [ 86]
+       INVALID_CHAR, // [ 87]
+       INVALID_CHAR, // [ 88]
+       INVALID_CHAR, // [ 89]
+       INVALID_CHAR, // [ 90]
+       INVALID_CHAR, // [ 91]
+       INVALID_CHAR, // [ 92]
+       INVALID_CHAR, // [ 93]
+       INVALID_CHAR, // [ 94]
+       INVALID_CHAR, // [ 95]
+       INVALID_CHAR, // [ 96]
+       'T',          // [ 97] a -> T
+       INVALID_CHAR, // [ 98]
+       'G',          // [ 99] c -> G
+       INVALID_CHAR, // [100]
+       INVALID_CHAR, // [101]
+       INVALID_CHAR, // [102]
+       'C',          // [103] g -> C
+       INVALID_CHAR, // [104]
+       INVALID_CHAR, // [105]
+       INVALID_CHAR, // [106]
+       INVALID_CHAR, // [107]
+       INVALID_CHAR, // [108]
+       INVALID_CHAR, // [109]
+       INVALID_CHAR, // [110]
+       INVALID_CHAR, // [111]
+       INVALID_CHAR, // [112]
+       INVALID_CHAR, // [113]
+       INVALID_CHAR, // [114]
+       INVALID_CHAR, // [115]
+       'A',          // [116] t -> A
+       'A',          // [117] u -> A
+       INVALID_CHAR, // [118]
+       INVALID_CHAR, // [119]
+       INVALID_CHAR, // [120]
+       INVALID_CHAR, // [121]
+       INVALID_CHAR, // [122]
+       INVALID_CHAR, // [123]
+       INVALID_CHAR, // [124]
+       INVALID_CHAR, // [125]
+       INVALID_CHAR, // [126]
+       INVALID_CHAR, // [127]
+       INVALID_CHAR, // [128]
+       INVALID_CHAR, // [129]
+       INVALID_CHAR, // [130]
+       INVALID_CHAR, // [131]
+       INVALID_CHAR, // [132]
+       INVALID_CHAR, // [133]
+       INVALID_CHAR, // [134]
+       INVALID_CHAR, // [135]
+       INVALID_CHAR, // [136]
+       INVALID_CHAR, // [137]
+       INVALID_CHAR, // [138]
+       INVALID_CHAR, // [139]
+       INVALID_CHAR, // [140]
+       INVALID_CHAR, // [141]
+       INVALID_CHAR, // [142]
+       INVALID_CHAR, // [143]
+       INVALID_CHAR, // [144]
+       INVALID_CHAR, // [145]
+       INVALID_CHAR, // [146]
+       INVALID_CHAR, // [147]
+       INVALID_CHAR, // [148]
+       INVALID_CHAR, // [149]
+       INVALID_CHAR, // [150]
+       INVALID_CHAR, // [151]
+       INVALID_CHAR, // [152]
+       INVALID_CHAR, // [153]
+       INVALID_CHAR, // [154]
+       INVALID_CHAR, // [155]
+       INVALID_CHAR, // [156]
+       INVALID_CHAR, // [157]
+       INVALID_CHAR, // [158]
+       INVALID_CHAR, // [159]
+       INVALID_CHAR, // [160]
+       INVALID_CHAR, // [161]
+       INVALID_CHAR, // [162]
+       INVALID_CHAR, // [163]
+       INVALID_CHAR, // [164]
+       INVALID_CHAR, // [165]
+       INVALID_CHAR, // [166]
+       INVALID_CHAR, // [167]
+       INVALID_CHAR, // [168]
+       INVALID_CHAR, // [169]
+       INVALID_CHAR, // [170]
+       INVALID_CHAR, // [171]
+       INVALID_CHAR, // [172]
+       INVALID_CHAR, // [173]
+       INVALID_CHAR, // [174]
+       INVALID_CHAR, // [175]
+       INVALID_CHAR, // [176]
+       INVALID_CHAR, // [177]
+       INVALID_CHAR, // [178]
+       INVALID_CHAR, // [179]
+       INVALID_CHAR, // [180]
+       INVALID_CHAR, // [181]
+       INVALID_CHAR, // [182]
+       INVALID_CHAR, // [183]
+       INVALID_CHAR, // [184]
+       INVALID_CHAR, // [185]
+       INVALID_CHAR, // [186]
+       INVALID_CHAR, // [187]
+       INVALID_CHAR, // [188]
+       INVALID_CHAR, // [189]
+       INVALID_CHAR, // [190]
+       INVALID_CHAR, // [191]
+       INVALID_CHAR, // [192]
+       INVALID_CHAR, // [193]
+       INVALID_CHAR, // [194]
+       INVALID_CHAR, // [195]
+       INVALID_CHAR, // [196]
+       INVALID_CHAR, // [197]
+       INVALID_CHAR, // [198]
+       INVALID_CHAR, // [199]
+       INVALID_CHAR, // [200]
+       INVALID_CHAR, // [201]
+       INVALID_CHAR, // [202]
+       INVALID_CHAR, // [203]
+       INVALID_CHAR, // [204]
+       INVALID_CHAR, // [205]
+       INVALID_CHAR, // [206]
+       INVALID_CHAR, // [207]
+       INVALID_CHAR, // [208]
+       INVALID_CHAR, // [209]
+       INVALID_CHAR, // [210]
+       INVALID_CHAR, // [211]
+       INVALID_CHAR, // [212]
+       INVALID_CHAR, // [213]
+       INVALID_CHAR, // [214]
+       INVALID_CHAR, // [215]
+       INVALID_CHAR, // [216]
+       INVALID_CHAR, // [217]
+       INVALID_CHAR, // [218]
+       INVALID_CHAR, // [219]
+       INVALID_CHAR, // [220]
+       INVALID_CHAR, // [221]
+       INVALID_CHAR, // [222]
+       INVALID_CHAR, // [223]
+       INVALID_CHAR, // [224]
+       INVALID_CHAR, // [225]
+       INVALID_CHAR, // [226]
+       INVALID_CHAR, // [227]
+       INVALID_CHAR, // [228]
+       INVALID_CHAR, // [229]
+       INVALID_CHAR, // [230]
+       INVALID_CHAR, // [231]
+       INVALID_CHAR, // [232]
+       INVALID_CHAR, // [233]
+       INVALID_CHAR, // [234]
+       INVALID_CHAR, // [235]
+       INVALID_CHAR, // [236]
+       INVALID_CHAR, // [237]
+       INVALID_CHAR, // [238]
+       INVALID_CHAR, // [239]
+       INVALID_CHAR, // [240]
+       INVALID_CHAR, // [241]
+       INVALID_CHAR, // [242]
+       INVALID_CHAR, // [243]
+       INVALID_CHAR, // [244]
+       INVALID_CHAR, // [245]
+       INVALID_CHAR, // [246]
+       INVALID_CHAR, // [247]
+       INVALID_CHAR, // [248]
+       INVALID_CHAR, // [249]
+       INVALID_CHAR, // [250]
+       INVALID_CHAR, // [251]
+       INVALID_CHAR, // [252]
+       INVALID_CHAR, // [253]
+       INVALID_CHAR, // [254]
+       INVALID_CHAR, // [255]
+};
+
+unsigned g_CharToCompLetter[256] =
+       {
+       INVALID_LETTER, // [  0]
+       INVALID_LETTER, // [  1]
+       INVALID_LETTER, // [  2]
+       INVALID_LETTER, // [  3]
+       INVALID_LETTER, // [  4]
+       INVALID_LETTER, // [  5]
+       INVALID_LETTER, // [  6]
+       INVALID_LETTER, // [  7]
+       INVALID_LETTER, // [  8]
+       INVALID_LETTER, // [  9]
+       INVALID_LETTER, // [ 10]
+       INVALID_LETTER, // [ 11]
+       INVALID_LETTER, // [ 12]
+       INVALID_LETTER, // [ 13]
+       INVALID_LETTER, // [ 14]
+       INVALID_LETTER, // [ 15]
+       INVALID_LETTER, // [ 16]
+       INVALID_LETTER, // [ 17]
+       INVALID_LETTER, // [ 18]
+       INVALID_LETTER, // [ 19]
+       INVALID_LETTER, // [ 20]
+       INVALID_LETTER, // [ 21]
+       INVALID_LETTER, // [ 22]
+       INVALID_LETTER, // [ 23]
+       INVALID_LETTER, // [ 24]
+       INVALID_LETTER, // [ 25]
+       INVALID_LETTER, // [ 26]
+       INVALID_LETTER, // [ 27]
+       INVALID_LETTER, // [ 28]
+       INVALID_LETTER, // [ 29]
+       INVALID_LETTER, // [ 30]
+       INVALID_LETTER, // [ 31]
+       INVALID_LETTER, // [ 32]
+       INVALID_LETTER, // [ 33]
+       INVALID_LETTER, // [ 34]
+       INVALID_LETTER, // [ 35]
+       INVALID_LETTER, // [ 36]
+       INVALID_LETTER, // [ 37]
+       INVALID_LETTER, // [ 38]
+       INVALID_LETTER, // [ 39]
+       INVALID_LETTER, // [ 40]
+       INVALID_LETTER, // [ 41]
+       INVALID_LETTER, // [ 42]
+       INVALID_LETTER, // [ 43]
+       INVALID_LETTER, // [ 44]
+       INVALID_LETTER, // [ 45]
+       INVALID_LETTER, // [ 46]
+       INVALID_LETTER, // [ 47]
+       INVALID_LETTER, // [ 48]
+       INVALID_LETTER, // [ 49]
+       INVALID_LETTER, // [ 50]
+       INVALID_LETTER, // [ 51]
+       INVALID_LETTER, // [ 52]
+       INVALID_LETTER, // [ 53]
+       INVALID_LETTER, // [ 54]
+       INVALID_LETTER, // [ 55]
+       INVALID_LETTER, // [ 56]
+       INVALID_LETTER, // [ 57]
+       INVALID_LETTER, // [ 58]
+       INVALID_LETTER, // [ 59]
+       INVALID_LETTER, // [ 60]
+       INVALID_LETTER, // [ 61]
+       INVALID_LETTER, // [ 62]
+       INVALID_LETTER, // [ 63]
+       INVALID_LETTER, // [ 64]
+       3,              // [ 65] A -> T
+       INVALID_LETTER, // [ 66]
+       2,              // [ 67] C -> G
+       INVALID_LETTER, // [ 68]
+       INVALID_LETTER, // [ 69]
+       INVALID_LETTER, // [ 70]
+       1,              // [ 71] G -> C
+       INVALID_LETTER, // [ 72]
+       INVALID_LETTER, // [ 73]
+       INVALID_LETTER, // [ 74]
+       INVALID_LETTER, // [ 75]
+       INVALID_LETTER, // [ 76]
+       INVALID_LETTER, // [ 77]
+       INVALID_LETTER, // [ 78]
+       INVALID_LETTER, // [ 79]
+       INVALID_LETTER, // [ 80]
+       INVALID_LETTER, // [ 81]
+       INVALID_LETTER, // [ 82]
+       INVALID_LETTER, // [ 83]
+       0,              // [ 84] T -> A
+       0,              // [ 85] U -> A
+       INVALID_LETTER, // [ 86]
+       INVALID_LETTER, // [ 87]
+       INVALID_LETTER, // [ 88]
+       INVALID_LETTER, // [ 89]
+       INVALID_LETTER, // [ 90]
+       INVALID_LETTER, // [ 91]
+       INVALID_LETTER, // [ 92]
+       INVALID_LETTER, // [ 93]
+       INVALID_LETTER, // [ 94]
+       INVALID_LETTER, // [ 95]
+       INVALID_LETTER, // [ 96]
+       3,              // [ 97] a -> T
+       INVALID_LETTER, // [ 98]
+       2,              // [ 99] c -> G
+       INVALID_LETTER, // [100]
+       INVALID_LETTER, // [101]
+       INVALID_LETTER, // [102]
+       1,              // [103] g -> C
+       INVALID_LETTER, // [104]
+       INVALID_LETTER, // [105]
+       INVALID_LETTER, // [106]
+       INVALID_LETTER, // [107]
+       INVALID_LETTER, // [108]
+       INVALID_LETTER, // [109]
+       INVALID_LETTER, // [110]
+       INVALID_LETTER, // [111]
+       INVALID_LETTER, // [112]
+       INVALID_LETTER, // [113]
+       INVALID_LETTER, // [114]
+       INVALID_LETTER, // [115]
+       0,              // [116] t -> A
+       0,              // [117] u -> A
+       INVALID_LETTER, // [118]
+       INVALID_LETTER, // [119]
+       INVALID_LETTER, // [120]
+       INVALID_LETTER, // [121]
+       INVALID_LETTER, // [122]
+       INVALID_LETTER, // [123]
+       INVALID_LETTER, // [124]
+       INVALID_LETTER, // [125]
+       INVALID_LETTER, // [126]
+       INVALID_LETTER, // [127]
+       INVALID_LETTER, // [128]
+       INVALID_LETTER, // [129]
+       INVALID_LETTER, // [130]
+       INVALID_LETTER, // [131]
+       INVALID_LETTER, // [132]
+       INVALID_LETTER, // [133]
+       INVALID_LETTER, // [134]
+       INVALID_LETTER, // [135]
+       INVALID_LETTER, // [136]
+       INVALID_LETTER, // [137]
+       INVALID_LETTER, // [138]
+       INVALID_LETTER, // [139]
+       INVALID_LETTER, // [140]
+       INVALID_LETTER, // [141]
+       INVALID_LETTER, // [142]
+       INVALID_LETTER, // [143]
+       INVALID_LETTER, // [144]
+       INVALID_LETTER, // [145]
+       INVALID_LETTER, // [146]
+       INVALID_LETTER, // [147]
+       INVALID_LETTER, // [148]
+       INVALID_LETTER, // [149]
+       INVALID_LETTER, // [150]
+       INVALID_LETTER, // [151]
+       INVALID_LETTER, // [152]
+       INVALID_LETTER, // [153]
+       INVALID_LETTER, // [154]
+       INVALID_LETTER, // [155]
+       INVALID_LETTER, // [156]
+       INVALID_LETTER, // [157]
+       INVALID_LETTER, // [158]
+       INVALID_LETTER, // [159]
+       INVALID_LETTER, // [160]
+       INVALID_LETTER, // [161]
+       INVALID_LETTER, // [162]
+       INVALID_LETTER, // [163]
+       INVALID_LETTER, // [164]
+       INVALID_LETTER, // [165]
+       INVALID_LETTER, // [166]
+       INVALID_LETTER, // [167]
+       INVALID_LETTER, // [168]
+       INVALID_LETTER, // [169]
+       INVALID_LETTER, // [170]
+       INVALID_LETTER, // [171]
+       INVALID_LETTER, // [172]
+       INVALID_LETTER, // [173]
+       INVALID_LETTER, // [174]
+       INVALID_LETTER, // [175]
+       INVALID_LETTER, // [176]
+       INVALID_LETTER, // [177]
+       INVALID_LETTER, // [178]
+       INVALID_LETTER, // [179]
+       INVALID_LETTER, // [180]
+       INVALID_LETTER, // [181]
+       INVALID_LETTER, // [182]
+       INVALID_LETTER, // [183]
+       INVALID_LETTER, // [184]
+       INVALID_LETTER, // [185]
+       INVALID_LETTER, // [186]
+       INVALID_LETTER, // [187]
+       INVALID_LETTER, // [188]
+       INVALID_LETTER, // [189]
+       INVALID_LETTER, // [190]
+       INVALID_LETTER, // [191]
+       INVALID_LETTER, // [192]
+       INVALID_LETTER, // [193]
+       INVALID_LETTER, // [194]
+       INVALID_LETTER, // [195]
+       INVALID_LETTER, // [196]
+       INVALID_LETTER, // [197]
+       INVALID_LETTER, // [198]
+       INVALID_LETTER, // [199]
+       INVALID_LETTER, // [200]
+       INVALID_LETTER, // [201]
+       INVALID_LETTER, // [202]
+       INVALID_LETTER, // [203]
+       INVALID_LETTER, // [204]
+       INVALID_LETTER, // [205]
+       INVALID_LETTER, // [206]
+       INVALID_LETTER, // [207]
+       INVALID_LETTER, // [208]
+       INVALID_LETTER, // [209]
+       INVALID_LETTER, // [210]
+       INVALID_LETTER, // [211]
+       INVALID_LETTER, // [212]
+       INVALID_LETTER, // [213]
+       INVALID_LETTER, // [214]
+       INVALID_LETTER, // [215]
+       INVALID_LETTER, // [216]
+       INVALID_LETTER, // [217]
+       INVALID_LETTER, // [218]
+       INVALID_LETTER, // [219]
+       INVALID_LETTER, // [220]
+       INVALID_LETTER, // [221]
+       INVALID_LETTER, // [222]
+       INVALID_LETTER, // [223]
+       INVALID_LETTER, // [224]
+       INVALID_LETTER, // [225]
+       INVALID_LETTER, // [226]
+       INVALID_LETTER, // [227]
+       INVALID_LETTER, // [228]
+       INVALID_LETTER, // [229]
+       INVALID_LETTER, // [230]
+       INVALID_LETTER, // [231]
+       INVALID_LETTER, // [232]
+       INVALID_LETTER, // [233]
+       INVALID_LETTER, // [234]
+       INVALID_LETTER, // [235]
+       INVALID_LETTER, // [236]
+       INVALID_LETTER, // [237]
+       INVALID_LETTER, // [238]
+       INVALID_LETTER, // [239]
+       INVALID_LETTER, // [240]
+       INVALID_LETTER, // [241]
+       INVALID_LETTER, // [242]
+       INVALID_LETTER, // [243]
+       INVALID_LETTER, // [244]
+       INVALID_LETTER, // [245]
+       INVALID_LETTER, // [246]
+       INVALID_LETTER, // [247]
+       INVALID_LETTER, // [248]
+       INVALID_LETTER, // [249]
+       INVALID_LETTER, // [250]
+       INVALID_LETTER, // [251]
+       INVALID_LETTER, // [252]
+       INVALID_LETTER, // [253]
+       INVALID_LETTER, // [254]
+       INVALID_LETTER, // [255]
+};
+
+bool g_IsAminoChar[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       true,  // [ 42] '*' = STP
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' = Ala
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' = Cys
+       true,  // [ 68] 'D' = Asp
+       true,  // [ 69] 'E' = Glu
+       true,  // [ 70] 'F' = Phe
+       true,  // [ 71] 'G' = Gly
+       true,  // [ 72] 'H' = His
+       true,  // [ 73] 'I' = Ile
+       false, // [ 74] 'J'
+       true,  // [ 75] 'K' = Lys
+       true,  // [ 76] 'L' = Leu
+       true,  // [ 77] 'M' = Met
+       true,  // [ 78] 'N' = Asn
+       false, // [ 79] 'O'
+       true,  // [ 80] 'P' = Pro
+       true,  // [ 81] 'Q' = Gln
+       true,  // [ 82] 'R' = Arg
+       true,  // [ 83] 'S' = Ser
+       true,  // [ 84] 'T' = Thr
+       false, // [ 85] 'U'
+       true,  // [ 86] 'V' = Val
+       true,  // [ 87] 'W' = Trp
+       false, // [ 88] 'X'
+       true,  // [ 89] 'Y' = Tyr
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' = Ala
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' = Cys
+       true,  // [100] 'D' = Asp
+       true,  // [101] 'E' = Glu
+       true,  // [102] 'F' = Phe
+       true,  // [103] 'G' = Gly
+       true,  // [104] 'H' = His
+       true,  // [105] 'I' = Ile
+       false, // [106] 'J'
+       true,  // [107] 'K' = Lys
+       true,  // [108] 'L' = Leu
+       true,  // [109] 'M' = Met
+       true,  // [110] 'N' = Asn
+       false, // [111] 'O'
+       true,  // [112] 'P' = Pro
+       true,  // [113] 'Q' = Gln
+       true,  // [114] 'R' = Arg
+       true,  // [115] 'S' = Ser
+       true,  // [116] 'T' = Thr
+       false, // [117] 'U'
+       true,  // [118] 'V' = Val
+       true,  // [119] 'W' = Trp
+       false, // [120] 'X'
+       true,  // [121] 'Y' = Tyr
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+bool g_IsNucleoChar[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       false, // [ 42] '*'
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' (Nucleotide)
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' (Nucleotide)
+       false, // [ 68] 'D'
+       false, // [ 69] 'E'
+       false, // [ 70] 'F'
+       true,  // [ 71] 'G' (Nucleotide)
+       false, // [ 72] 'H'
+       false, // [ 73] 'I'
+       false, // [ 74] 'J'
+       false, // [ 75] 'K'
+       false, // [ 76] 'L'
+       false, // [ 77] 'M'
+       true,  // [ 78] 'N' (Nucleotide)
+       false, // [ 79] 'O'
+       false, // [ 80] 'P'
+       false, // [ 81] 'Q'
+       false, // [ 82] 'R'
+       false, // [ 83] 'S'
+       true,  // [ 84] 'T' (Nucleotide)
+       true,  // [ 85] 'U' (Nucleotide)
+       false, // [ 86] 'V'
+       false, // [ 87] 'W'
+       false, // [ 88] 'X'
+       false, // [ 89] 'Y'
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' (Nucleotide)
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' (Nucleotide)
+       false, // [100] 'D'
+       false, // [101] 'E'
+       false, // [102] 'F'
+       true,  // [103] 'G' (Nucleotide)
+       false, // [104] 'H'
+       false, // [105] 'I'
+       false, // [106] 'J'
+       false, // [107] 'K'
+       false, // [108] 'L'
+       false, // [109] 'M'
+       true,  // [110] 'N' (Nucleotide)
+       false, // [111] 'O'
+       false, // [112] 'P'
+       false, // [113] 'Q'
+       false, // [114] 'R'
+       false, // [115] 'S'
+       true,  // [116] 'T' (Nucleotide)
+       true,  // [117] 'U' (Nucleotide)
+       false, // [118] 'V'
+       false, // [119] 'W'
+       false, // [120] 'X'
+       false, // [121] 'Y'
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+bool g_IsACGTU[256] =
+       {
+       false, // [  0] 0x00
+       false, // [  1] 0x01
+       false, // [  2] 0x02
+       false, // [  3] 0x03
+       false, // [  4] 0x04
+       false, // [  5] 0x05
+       false, // [  6] 0x06
+       false, // [  7] 0x07
+       false, // [  8] 0x08
+       false, // [  9] 0x09
+       false, // [ 10] 0x0a
+       false, // [ 11] 0x0b
+       false, // [ 12] 0x0c
+       false, // [ 13] 0x0d
+       false, // [ 14] 0x0e
+       false, // [ 15] 0x0f
+       false, // [ 16] 0x10
+       false, // [ 17] 0x11
+       false, // [ 18] 0x12
+       false, // [ 19] 0x13
+       false, // [ 20] 0x14
+       false, // [ 21] 0x15
+       false, // [ 22] 0x16
+       false, // [ 23] 0x17
+       false, // [ 24] 0x18
+       false, // [ 25] 0x19
+       false, // [ 26] 0x1a
+       false, // [ 27] 0x1b
+       false, // [ 28] 0x1c
+       false, // [ 29] 0x1d
+       false, // [ 30] 0x1e
+       false, // [ 31] 0x1f
+       false, // [ 32] ' '
+       false, // [ 33] '!'
+       false, // [ 34] '"'
+       false, // [ 35] '#'
+       false, // [ 36] '$'
+       false, // [ 37] '%'
+       false, // [ 38] '&'
+       false, // [ 39] '''
+       false, // [ 40] '('
+       false, // [ 41] ')'
+       false, // [ 42] '*'
+       false, // [ 43] '+'
+       false, // [ 44] ','
+       false, // [ 45] '-'
+       false, // [ 46] '.'
+       false, // [ 47] '/'
+       false, // [ 48] '0'
+       false, // [ 49] '1'
+       false, // [ 50] '2'
+       false, // [ 51] '3'
+       false, // [ 52] '4'
+       false, // [ 53] '5'
+       false, // [ 54] '6'
+       false, // [ 55] '7'
+       false, // [ 56] '8'
+       false, // [ 57] '9'
+       false, // [ 58] ':'
+       false, // [ 59] ';'
+       false, // [ 60] '<'
+       false, // [ 61] '='
+       false, // [ 62] '>'
+       false, // [ 63] '?'
+       false, // [ 64] '@'
+       true,  // [ 65] 'A' (ACGT)
+       false, // [ 66] 'B'
+       true,  // [ 67] 'C' (ACGT)
+       false, // [ 68] 'D'
+       false, // [ 69] 'E'
+       false, // [ 70] 'F'
+       true,  // [ 71] 'G' (ACGT)
+       false, // [ 72] 'H'
+       false, // [ 73] 'I'
+       false, // [ 74] 'J'
+       false, // [ 75] 'K'
+       false, // [ 76] 'L'
+       false, // [ 77] 'M'
+       false, // [ 78] 'N'
+       false, // [ 79] 'O'
+       false, // [ 80] 'P'
+       false, // [ 81] 'Q'
+       false, // [ 82] 'R'
+       false, // [ 83] 'S'
+       true,  // [ 84] 'T' (ACGT)
+       true,  // [ 85] 'U' (ACGT)
+       false, // [ 86] 'V'
+       false, // [ 87] 'W'
+       false, // [ 88] 'X'
+       false, // [ 89] 'Y'
+       false, // [ 90] 'Z'
+       false, // [ 91] '['
+       false, // [ 92] '\'
+       false, // [ 93] ']'
+       false, // [ 94] '^'
+       false, // [ 95] '_'
+       false, // [ 96] '`'
+       true,  // [ 97] 'A' (ACGT)
+       false, // [ 98] 'B'
+       true,  // [ 99] 'C' (ACGT)
+       false, // [100] 'D'
+       false, // [101] 'E'
+       false, // [102] 'F'
+       true,  // [103] 'G' (ACGT)
+       false, // [104] 'H'
+       false, // [105] 'I'
+       false, // [106] 'J'
+       false, // [107] 'K'
+       false, // [108] 'L'
+       false, // [109] 'M'
+       false, // [110] 'N'
+       false, // [111] 'O'
+       false, // [112] 'P'
+       false, // [113] 'Q'
+       false, // [114] 'R'
+       false, // [115] 'S'
+       true,  // [116] 'T' (ACGT)
+       true,  // [117] 'U' (ACGT)
+       false, // [118] 'V'
+       false, // [119] 'W'
+       false, // [120] 'X'
+       false, // [121] 'Y'
+       false, // [122] 'Z'
+       false, // [123] '{'
+       false, // [124] '|'
+       false, // [125] '}'
+       false, // [126] '~'
+       false, // [127] 0x7f
+       false, // [128] 0x80
+       false, // [129] 0x81
+       false, // [130] 0x82
+       false, // [131] 0x83
+       false, // [132] 0x84
+       false, // [133] 0x85
+       false, // [134] 0x86
+       false, // [135] 0x87
+       false, // [136] 0x88
+       false, // [137] 0x89
+       false, // [138] 0x8a
+       false, // [139] 0x8b
+       false, // [140] 0x8c
+       false, // [141] 0x8d
+       false, // [142] 0x8e
+       false, // [143] 0x8f
+       false, // [144] 0x90
+       false, // [145] 0x91
+       false, // [146] 0x92
+       false, // [147] 0x93
+       false, // [148] 0x94
+       false, // [149] 0x95
+       false, // [150] 0x96
+       false, // [151] 0x97
+       false, // [152] 0x98
+       false, // [153] 0x99
+       false, // [154] 0x9a
+       false, // [155] 0x9b
+       false, // [156] 0x9c
+       false, // [157] 0x9d
+       false, // [158] 0x9e
+       false, // [159] 0x9f
+       false, // [160] 0xa0
+       false, // [161] 0xa1
+       false, // [162] 0xa2
+       false, // [163] 0xa3
+       false, // [164] 0xa4
+       false, // [165] 0xa5
+       false, // [166] 0xa6
+       false, // [167] 0xa7
+       false, // [168] 0xa8
+       false, // [169] 0xa9
+       false, // [170] 0xaa
+       false, // [171] 0xab
+       false, // [172] 0xac
+       false, // [173] 0xad
+       false, // [174] 0xae
+       false, // [175] 0xaf
+       false, // [176] 0xb0
+       false, // [177] 0xb1
+       false, // [178] 0xb2
+       false, // [179] 0xb3
+       false, // [180] 0xb4
+       false, // [181] 0xb5
+       false, // [182] 0xb6
+       false, // [183] 0xb7
+       false, // [184] 0xb8
+       false, // [185] 0xb9
+       false, // [186] 0xba
+       false, // [187] 0xbb
+       false, // [188] 0xbc
+       false, // [189] 0xbd
+       false, // [190] 0xbe
+       false, // [191] 0xbf
+       false, // [192] 0xc0
+       false, // [193] 0xc1
+       false, // [194] 0xc2
+       false, // [195] 0xc3
+       false, // [196] 0xc4
+       false, // [197] 0xc5
+       false, // [198] 0xc6
+       false, // [199] 0xc7
+       false, // [200] 0xc8
+       false, // [201] 0xc9
+       false, // [202] 0xca
+       false, // [203] 0xcb
+       false, // [204] 0xcc
+       false, // [205] 0xcd
+       false, // [206] 0xce
+       false, // [207] 0xcf
+       false, // [208] 0xd0
+       false, // [209] 0xd1
+       false, // [210] 0xd2
+       false, // [211] 0xd3
+       false, // [212] 0xd4
+       false, // [213] 0xd5
+       false, // [214] 0xd6
+       false, // [215] 0xd7
+       false, // [216] 0xd8
+       false, // [217] 0xd9
+       false, // [218] 0xda
+       false, // [219] 0xdb
+       false, // [220] 0xdc
+       false, // [221] 0xdd
+       false, // [222] 0xde
+       false, // [223] 0xdf
+       false, // [224] 0xe0
+       false, // [225] 0xe1
+       false, // [226] 0xe2
+       false, // [227] 0xe3
+       false, // [228] 0xe4
+       false, // [229] 0xe5
+       false, // [230] 0xe6
+       false, // [231] 0xe7
+       false, // [232] 0xe8
+       false, // [233] 0xe9
+       false, // [234] 0xea
+       false, // [235] 0xeb
+       false, // [236] 0xec
+       false, // [237] 0xed
+       false, // [238] 0xee
+       false, // [239] 0xef
+       false, // [240] 0xf0
+       false, // [241] 0xf1
+       false, // [242] 0xf2
+       false, // [243] 0xf3
+       false, // [244] 0xf4
+       false, // [245] 0xf5
+       false, // [246] 0xf6
+       false, // [247] 0xf7
+       false, // [248] 0xf8
+       false, // [249] 0xf9
+       false, // [250] 0xfa
+       false, // [251] 0xfb
+       false, // [252] 0xfc
+       false, // [253] 0xfd
+       false, // [254] 0xfe
+       false, // [255] 0xff
+       };
+
+float g_AminoFreqs[20] =
+       {
+       0.0777f, // 'A' = Ala
+       0.0161f, // 'C' = Cys
+       0.0527f, // 'D' = Asp
+       0.0631f, // 'E' = Glu
+       0.0417f, // 'F' = Phe
+       0.0718f, // 'G' = Gly
+       0.0238f, // 'H' = His
+       0.0606f, // 'I' = Ile
+       0.0601f, // 'K' = Lys
+       0.0906f, // 'L' = Leu
+       0.0233f, // 'M' = Met
+       0.0439f, // 'N' = Asn
+       0.0456f, // 'P' = Pro
+       0.0368f, // 'Q' = Gln
+       0.0526f, // 'R' = Arg
+       0.0639f, // 'S' = Ser
+       0.0570f, // 'T' = Thr
+       0.0712f, // 'V' = Val
+       0.0134f, // 'W' = Trp
+       0.0339f, // 'Y' = Tyr
+       };
diff --git a/alpha.h b/alpha.h
new file mode 100644 (file)
index 0000000..e021b7f
--- /dev/null
+++ b/alpha.h
@@ -0,0 +1,50 @@
+#ifndef alpha_h\r
+#define alpha_h\r
+\r
+#include <limits.h>\r
+#include <string>\r
+\r
+using namespace std;\r
+\r
+const unsigned INVALID_LETTER = 0;\r
+const unsigned char INVALID_CHAR = '?';\r
+\r
+extern unsigned g_CharToLetterAmino[];\r
+extern unsigned g_CharToLetterAminoStop[];\r
+extern unsigned char g_LetterToCharAmino[];\r
+extern unsigned g_CharToLetterNucleo[];\r
+extern unsigned char g_LetterToCharNucleo[];\r
+extern unsigned g_CodonWordToAminoLetter[];\r
+extern char g_CodonWordToAminoChar[];\r
+extern unsigned char g_CharToCompChar[];\r
+extern unsigned g_CharToCompLetter[];\r
+extern bool g_IsAminoChar[];\r
+extern bool g_IsNucleoChar[];\r
+extern bool g_IsACGTU[];\r
+extern float g_AminoFreqs[];\r
+\r
+extern unsigned g_CharToLetterRed[];\r
+extern unsigned char g_LetterToCharRed[];\r
+extern unsigned g_RedAlphaSize;\r
+\r
+void LogRedAlphaRed();\r
+void ReadRedAlphaFromFile(const string &FileName);\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+  unsigned char c3);\r
+\r
+static inline bool AminoLetterIsStartCodon(unsigned char Letter)\r
+       {\r
+       return Letter == 10;\r
+       }\r
+\r
+static inline bool AminoLetterIsStopCodon(unsigned char Letter)\r
+       {\r
+       return Letter == 20;\r
+       }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo);\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str);\r
+\r
+#endif // alpha_h\r
diff --git a/alpha2.cpp b/alpha2.cpp
new file mode 100644 (file)
index 0000000..26bc1c6
--- /dev/null
@@ -0,0 +1,100 @@
+#include "myutils.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+bool isgap(byte c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength)\r
+       {\r
+       static char Str[32];\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%20;\r
+               Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+               Word /= 20;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str)\r
+       {\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%20;\r
+               Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+               Word /= 20;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength)\r
+       {\r
+       static char Str[32];\r
+       for (unsigned i = 0; i < WordLength; ++i)\r
+               {\r
+               unsigned Letter = Word%4;\r
+               Str[WordLength-i-1] = g_LetterToCharNucleo[Letter];\r
+               Word /= 4;\r
+               }\r
+       Str[WordLength] = 0;\r
+       return Str;\r
+       }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo)\r
+       {\r
+       return (Nucleo ? WordToStrNucleo : WordToStrAmino)(Word, WordLength);\r
+       }\r
+\r
+byte *RevCompAlloc(const byte *Seq, unsigned L)\r
+       {\r
+       byte *RCSeq = MYALLOC(byte, L, Alpha);\r
+\r
+       for (unsigned i = 0; i < L; ++i)\r
+               RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+\r
+       return RCSeq;\r
+       }\r
+\r
+void RevCompInPlace(byte *Seq, unsigned L)\r
+       {\r
+       unsigned L1 = L - 1;\r
+       unsigned L2 = L/2;\r
+       for (unsigned i = 0; i < L2; ++i)\r
+               {\r
+               unsigned j = L1 - i;\r
+               unsigned ci = Seq[i];\r
+               unsigned cj = Seq[j];\r
+\r
+               unsigned ri = g_CharToCompChar[ci];\r
+               unsigned rj = g_CharToCompChar[cj];\r
+\r
+               Seq[i] = rj;\r
+               Seq[j] = ri;\r
+               }\r
+\r
+       if (L%2 == 1)\r
+               Seq[L2] = g_CharToCompChar[Seq[L2]];\r
+       }\r
+\r
+void RevComp(const byte *Seq, unsigned L, byte *RCSeq)\r
+       {\r
+       for (unsigned i = 0; i < L; ++i)\r
+               RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+       }\r
+\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+  unsigned char c3)\r
+       {\r
+       unsigned Letter1 = g_CharToLetterNucleo[c1];\r
+       unsigned Letter2 = g_CharToLetterNucleo[c2];\r
+       unsigned Letter3 = g_CharToLetterNucleo[c3];\r
+       unsigned Word = Letter1*(4*4) + Letter2*4 + Letter3;\r
+\r
+       unsigned Letter = g_CodonWordToAminoLetter[Word];\r
+       return g_LetterToCharAmino[Letter];\r
+       }\r
index 4f3047d4d3dcded81e738fba36a79abdc81e680b..70349e50f0264b68fba76c4f04ebbddd0c8c96f6 100644 (file)
@@ -77,8 +77,15 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
                //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
                
-               string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
-               blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+               string blastCommand;
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
+                       blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
+                       blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+               #else
+                       blastCommand =  "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
+                       blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+               #endif
                system(blastCommand.c_str());
                
                ifstream m8FileHandle;
@@ -133,7 +140,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                        blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                        blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
                #else
-                       blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
+                       blastCommand =  "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                        blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
                #endif
                
@@ -206,7 +213,7 @@ void BlastDB::generateDB() {
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;       //      format the database, -o option gives us the ability
                #else
-                       formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName;
+                       formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + dbFileName;
                #endif
                system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
                                                                                                                                        //      option tells formatdb that seqs are DNA, not prot
diff --git a/chainer.h b/chainer.h
new file mode 100644 (file)
index 0000000..a954dc0
--- /dev/null
+++ b/chainer.h
@@ -0,0 +1,79 @@
+#ifndef chainer_h\r
+#define chainer_h\r
+\r
+#include "hsp.h"\r
+#include "seq.h"\r
+#include <list>\r
+\r
+const float BAD_SCORE = -9e9f;\r
+\r
+struct TargetHit\r
+       {\r
+       unsigned TargetIndex;\r
+       unsigned TargetLo;\r
+       unsigned TargetHi;\r
+       int QueryFrame;\r
+       float RawScore; // SOMETIMES USED FOR BIT SCORE!!!\r
+//     unsigned TargetLength;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("lo %u, hi %u, frame %d, score %.1f\n",\r
+                 TargetLo, TargetHi, QueryFrame, RawScore);\r
+               }\r
+       };\r
+\r
+struct ChainData\r
+       {\r
+       unsigned LastHSPIndex;\r
+       unsigned Ahi;\r
+       unsigned Bhi;\r
+       float Score;\r
+       };\r
+\r
+class Chainer\r
+       {\r
+public:\r
+       HSPData **m_HSPs; // memory owned elsewhere\r
+       unsigned m_HSPCount;\r
+       unsigned m_MaxHSPCount;\r
+\r
+       BPData *m_BPs;\r
+\r
+       unsigned *m_PrevHSPIndexes;             // Predecessor in chain\r
+       float *m_HSPIndexToChainScore;\r
+\r
+       list<unsigned> m_Chains;                // Live HSP indexes\r
+\r
+public:\r
+       Chainer();\r
+       ~Chainer();\r
+       void Reset();\r
+       void Clear(bool ctor = false);\r
+       float Chain(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+         unsigned &OptChainLength);\r
+       bool ResolveOverlaps(const SeqData &SA, const SeqData &SB, double MinScore,\r
+         const float * const *SubstMx, HSPData **InHSPs, unsigned InHSPCount,\r
+         HSPData **OutHSPs, unsigned &OutHSPCount);\r
+       void ResolveOverlap(HSPData &HSP1, HSPData &HSP2);\r
+\r
+       float ChainBrute(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+         unsigned &OptChainLength);\r
+       void LogMe() const;\r
+       void LogHSPs(HSPData **HSPs, unsigned HSPCount) const;\r
+       void LogBPs() const;\r
+\r
+       static bool IsValidChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void AssertValidChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void LogChain(HSPData **HSPs, unsigned HSPCount);\r
+       static void LogChain2(HSPData **HSPs, unsigned HSPCount);\r
+       static float GetChainScore(HSPData **HSPs, unsigned HSPCount);\r
+\r
+private:\r
+       void AllocHSPCount(unsigned MaxHSPCount);\r
+       void SetBPs();\r
+       void SortBPs();\r
+       unsigned FindBestChainLT(unsigned Ahi, unsigned Bhi);\r
+       };\r
+\r
+#endif // chainer_h\r
diff --git a/chime.h b/chime.h
new file mode 100644 (file)
index 0000000..1b0662a
--- /dev/null
+++ b/chime.h
@@ -0,0 +1,104 @@
+#ifndef chime_h\r
+#define chime_h\r
+\r
+#include "seq.h"\r
+\r
+struct ChimeHit2\r
+       {\r
+       string QLabel;\r
+       string ALabel;\r
+       string BLabel;\r
+       string Q3;\r
+       string A3;\r
+       string B3;\r
+\r
+       //unsigned LY, LN, LA, LD;\r
+       //unsigned RY, RN, RA, RD;\r
+       double PctIdQT, PctIdQA, PctIdQB, PctIdQM, PctIdAB;\r
+\r
+       unsigned ColLo;\r
+       unsigned ColXLo;\r
+       unsigned ColXHi;\r
+       unsigned ColHi;\r
+       unsigned QXLo;\r
+       unsigned QXHi;\r
+\r
+       double Div;\r
+       double Score;\r
+       double H;\r
+\r
+       unsigned CS_LY, CS_LN, CS_LA, CS_RY, CS_RN, CS_RA;\r
+\r
+       float AbQ;\r
+       float AbA;\r
+       float AbB;\r
+\r
+       ChimeHit2()\r
+               {\r
+               Clear();\r
+               }\r
+\r
+       void Clear()\r
+               {\r
+               Q3.clear();\r
+               A3.clear();\r
+               B3.clear();\r
+               QLabel.clear();\r
+               ALabel.clear();\r
+               BLabel.clear();\r
+\r
+               //LY = LN = LA = LD = UINT_MAX;\r
+               //RY = RN = RA = RD = UINT_MAX;\r
+               ColLo = ColHi = QXLo = QXHi = ColXLo = ColXHi = UINT_MAX;\r
+               CS_LY = CS_LN = CS_LA = CS_RY = CS_RN = CS_RA = UINT_MAX;\r
+               PctIdQT = PctIdQA = PctIdQB = PctIdQM = PctIdAB = -1.0;\r
+               Div = -1.0;\r
+               H = -1.0;\r
+               Score = -1.0;\r
+               AbQ = AbA = AbB = -1.0f;\r
+               };\r
+\r
+       bool Accept() const\r
+               {\r
+               return Score >= opt_minh && Div >= opt_mindiv && CS_LY >= opt_mindiffs && CS_RY >= opt_mindiffs;\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("@L %c ", yon(Score >= 1.0 && Div >= 1.0));\r
+               Log(" %.4f", Score);\r
+               Log(" LY %u LN %u LA %u", CS_LY, CS_LN, CS_LA);\r
+               Log(" RY %u RN %u RA %u", CS_RY, CS_RN, CS_RA);\r
+               Log(" Div %.1f%%", Div);\r
+               Log(" Q=%s", QLabel.c_str());\r
+               Log(" A=%s", ALabel.c_str());\r
+               Log(" B=%s", BLabel.c_str());\r
+               Log(" QA %.1f%% QB=%.1f%% AB=%.1f%% QM=%.1f%%", PctIdQA, PctIdQB, PctIdAB, PctIdQM);\r
+               Log("\n");\r
+               }\r
+\r
+       bool operator<(const ChimeHit2 &rhs) const\r
+               {\r
+               if (Score == rhs.Score)\r
+                       return Div > rhs.Div;\r
+               return Score > rhs.Score;\r
+               }\r
+       };\r
+\r
+static inline bool isacgt(char c)\r
+       {\r
+       return c == 'A' || c == 'C' || c == 'G' || c == 'T';\r
+       }\r
+\r
+static bool inline isgap(char c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los);\r
+float GetAbFromLabel(const string &Label);\r
+void WriteChimeHitCS(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeFileHdr(FILE *f);\r
+\r
+#endif // chime_h\r
index 58e678198acc83e980942317984361b46f6194e0..abd699eff98156435de11ad2f3cf891b09ffa76e 100644 (file)
@@ -605,6 +605,9 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        remove((accnos + toString(processIDS[i]) + ".temp").c_str());
                }
 #endif         
+               //get rid of the file pieces.
+               for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
+               
                return num;     
        }
        catch(exception& e) {
diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h
new file mode 100644 (file)
index 0000000..36e4a39
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef CHIMERAUCHIMECOMMAND_H
+#define CHIMERAUCHIMECOMMAND_H
+
+
+/*
+ *  chimerauchimecommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 5/13/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+
+/***********************************************************/
+
+class ChimeraUchimeCommand : public Command {
+public:
+       ChimeraUchimeCommand(string);
+       ChimeraUchimeCommand();
+       ~ChimeraUchimeCommand() {}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "chimera.uchime";              }
+       string getCommandCategory()             { return "Sequence Processing"; }
+       string getHelpString(); 
+       string getCitation() { return "http://drive5.com/uchime/ \nhttp://www.mothur.org/wiki/Chimera.uchime"; }
+       
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }          
+       
+private:
+       vector<int> processIDS;   //processid
+       int driver(string, string, string);
+       int createProcesses(string, string, string);
+       
+#ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+#endif
+       
+       bool abort;
+       string fastafile, templatefile, outputDir, namefile;
+       int processors;
+       
+       vector<string> outputNames;
+       vector<string> fastaFileNames;
+       vector<string> nameFileNames;
+       
+};
+
+/***********************************************************/
+
+#endif
+
+
index 46f49b28f7b2de79b6525c05b2110325684194da..d72dd7139735f16b8fcd8b02aa39058a5f4a3110 100644 (file)
@@ -57,7 +57,7 @@ vector<string> CollectSharedCommand::setParameters(){
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -119,7 +119,7 @@ CollectSharedCommand::CollectSharedCommand(){
                outputTypes["sharednseqs"] = tempOutNames;
                outputTypes["ochiai"] = tempOutNames;
                outputTypes["anderberg"] = tempOutNames;
-               outputTypes["skulczynski"] = tempOutNames;
+               outputTypes["kulczynski"] = tempOutNames;
                outputTypes["kulczynskicody"] = tempOutNames;
                outputTypes["lennon"] = tempOutNames;
                outputTypes["morisitahorn"] = tempOutNames;
@@ -191,7 +191,7 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                        outputTypes["sharednseqs"] = tempOutNames;
                        outputTypes["ochiai"] = tempOutNames;
                        outputTypes["anderberg"] = tempOutNames;
-                       outputTypes["skulczynski"] = tempOutNames;
+                       outputTypes["kulczynski"] = tempOutNames;
                        outputTypes["kulczynskicody"] = tempOutNames;
                        outputTypes["lennon"] = tempOutNames;
                        outputTypes["morisitahorn"] = tempOutNames;
@@ -335,7 +335,7 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                                                }else if (Estimators[i] == "anderberg") { 
                                                        cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
                                                        outputNames.push_back(fileNameRoot+"anderberg"); outputTypes["anderberg"].push_back(fileNameRoot+"anderberg");
-                                               }else if (Estimators[i] == "skulczynski") { 
+                                               }else if (Estimators[i] == "kulczynski") { 
                                                        cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
                                                        outputNames.push_back(fileNameRoot+"kulczynski"); outputTypes["kulczynski"].push_back(fileNameRoot+"kulczynski");
                                                }else if (Estimators[i] == "kulczynskicody") { 
index 63e0690f40cebb2b1ac0bf1b481a6848261c71bd..681e3e59f7e987a792b934d4ade36e182ed984ca 100644 (file)
@@ -69,6 +69,7 @@
 #include "chimeraslayercommand.h"
 #include "chimerapintailcommand.h"
 #include "chimerabellerophoncommand.h"
+#include "chimerauchimecommand.h"
 #include "setlogfilecommand.h"
 #include "phylodiversitycommand.h"
 #include "makegroupcommand.h"
@@ -247,6 +248,7 @@ CommandFactory::CommandFactory(){
        commands["chimera.ccode"]               = "MPIEnabled";
        commands["chimera.check"]               = "MPIEnabled";
        commands["chimera.slayer"]              = "MPIEnabled";
+       commands["chimera.uchime"]              = "MPIEnabled";
        commands["chimera.pintail"]             = "MPIEnabled";
        commands["chimera.bellerophon"] = "MPIEnabled";
        commands["screen.seqs"]                 = "MPIEnabled";
@@ -351,6 +353,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "chimera.ccode")                 {       command = new ChimeraCcodeCommand(optionString);                        }
                else if(commandName == "chimera.check")                 {       command = new ChimeraCheckCommand(optionString);                        }
                else if(commandName == "chimera.slayer")                {       command = new ChimeraSlayerCommand(optionString);                       }
+               else if(commandName == "chimera.uchime")                {       command = new ChimeraUchimeCommand(optionString);                       }
                else if(commandName == "chimera.pintail")               {       command = new ChimeraPintailCommand(optionString);                      }
                else if(commandName == "chimera.bellerophon")   {       command = new ChimeraBellerophonCommand(optionString);          }
                else if(commandName == "phylotype")                             {       command = new PhylotypeCommand(optionString);                           }
@@ -486,6 +489,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "classify.seqs")                 {       pipecommand = new ClassifySeqsCommand(optionString);                    }
                else if(commandName == "chimera.ccode")                 {       pipecommand = new ChimeraCcodeCommand(optionString);                    }
                else if(commandName == "chimera.check")                 {       pipecommand = new ChimeraCheckCommand(optionString);                    }
+               else if(commandName == "chimera.uchime")                {       pipecommand = new ChimeraUchimeCommand(optionString);                   }
                else if(commandName == "chimera.slayer")                {       pipecommand = new ChimeraSlayerCommand(optionString);                   }
                else if(commandName == "chimera.pintail")               {       pipecommand = new ChimeraPintailCommand(optionString);                  }
                else if(commandName == "chimera.bellerophon")   {       pipecommand = new ChimeraBellerophonCommand(optionString);              }
@@ -610,6 +614,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "chimera.ccode")                 {       shellcommand = new ChimeraCcodeCommand();                       }
                else if(commandName == "chimera.check")                 {       shellcommand = new ChimeraCheckCommand();                       }
                else if(commandName == "chimera.slayer")                {       shellcommand = new ChimeraSlayerCommand();                      }
+               else if(commandName == "chimera.uchime")                {       shellcommand = new ChimeraUchimeCommand();                      }
                else if(commandName == "chimera.pintail")               {       shellcommand = new ChimeraPintailCommand();                     }
                else if(commandName == "chimera.bellerophon")   {       shellcommand = new ChimeraBellerophonCommand();         }
                else if(commandName == "phylotype")                             {       shellcommand = new PhylotypeCommand();                          }
diff --git a/diagbox.h b/diagbox.h
new file mode 100644 (file)
index 0000000..0c5846c
--- /dev/null
+++ b/diagbox.h
@@ -0,0 +1,193 @@
+#ifndef diagbox_h\r
+#define diagbox_h\r
+\r
+struct DiagBox;\r
+\r
+void GetDiagBox(unsigned LA, unsigned LB, unsigned DiagLo, unsigned DiagHi, DiagBox &Box);\r
+void GetDiagRange(unsigned LA, unsigned LB, unsigned d,\r
+  unsigned &mini, unsigned &minj, unsigned &maxi, unsigned &maxj);\r
+void GetDiagLoHi(unsigned LA, unsigned LB, const char *Path,\r
+  unsigned &dlo, unsigned &dhi);\r
+\r
+struct DiagBox\r
+       {\r
+       DiagBox()\r
+               {\r
+               }\r
+\r
+       DiagBox(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+               {\r
+               //GetDiagBox(LA, LB, DiagLo, DiagHi, *this);\r
+               //Validate();\r
+               Init(LA_, LB_, DiagLo, DiagHi);\r
+               }\r
+\r
+       void Init(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+               {\r
+               GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this);\r
+               Validate();\r
+               }\r
+\r
+       unsigned LA;\r
+       unsigned LB;\r
+\r
+       unsigned dlo;\r
+       unsigned dhi;\r
+\r
+       unsigned dlo_mini;\r
+       unsigned dlo_minj;\r
+\r
+       unsigned dlo_maxi;\r
+       unsigned dlo_maxj;\r
+\r
+       unsigned dhi_mini;\r
+       unsigned dhi_minj;\r
+\r
+       unsigned dhi_maxi;\r
+       unsigned dhi_maxj;\r
+\r
+       unsigned GetDiag(unsigned i, unsigned j) const\r
+               {\r
+               return LA - i + j;\r
+               }\r
+\r
+// i, j are positions 0..LA-1, 0..LB-1.\r
+       bool InBox(unsigned i, unsigned j) const\r
+               {\r
+               unsigned d = GetDiag(i, j);\r
+               return d >= dlo && d <= dhi;\r
+               }\r
+\r
+/***\r
+i, j are 0-based prefix lengths 0..LA, 0..LB.\r
+\r
+A full path is in the box iff all match pairs are in the box.\r
+\r
+A partial path that aligns a prefix of A to a prefix of B as\r
+in D.P.) is in the box iff it is is the prefix of at least\r
+one full path that is in the box.\r
+\r
+A D.P. matrix entry X[i][j] is in the box iff there is at\r
+least one full path aligning the first i letters of A and\r
+the first j letters of B ending in a column of type X, i.e.\r
+if there exists a partial path in the box that ends in X.\r
+\r
+Assume terminals appear in all paths, and DI/ID forbidden.\r
+\r
+Intuitively seems that by these definitions D is in box iff\r
+DM or MD is in box, I is in box iff IM or MI is in box.\r
+Don't have proof..\r
+***/\r
+       bool InBoxDPM(unsigned i, unsigned j) const\r
+               {\r
+       // Special case for M[0][0]\r
+               if (i == 0 && j == 0)\r
+                       return true;\r
+               if (i == 0 || j == 0)\r
+                       return false;\r
+               unsigned d = GetDiag(i-1, j-1);\r
+               return d >= dlo && d <= dhi;\r
+               }\r
+\r
+       bool InBoxDPD(unsigned i, unsigned j) const\r
+               {\r
+               bool MD = i == 0 ? false : InBoxDPM(i-1, j);\r
+               bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+               return MD || DM;\r
+               }\r
+\r
+       bool InBoxDPI(unsigned i, unsigned j) const\r
+               {\r
+               bool MI = j == 0 ? false : InBoxDPM(i, j-1);\r
+               bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+               return MI || IM;\r
+               }\r
+\r
+       // d = LA - i + j = 1 .. LA+LB-1\r
+       void Validate() const\r
+               {\r
+               asserta(dlo <= dhi);\r
+               asserta(dlo >= GetDiag(LA-1, 0));\r
+               asserta(dhi <= GetDiag(0, LB-1));\r
+\r
+               asserta(GetDiag(dlo_mini, dlo_minj) == dlo);\r
+               asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo);\r
+               asserta(GetDiag(dhi_mini, dhi_minj) == dhi);\r
+               asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi);\r
+\r
+               asserta(dlo_mini >= dhi_mini);\r
+               asserta(dlo_minj <= dhi_minj);\r
+               asserta(dlo_maxi >= dhi_maxi);\r
+               asserta(dlo_maxj <= dhi_maxj);\r
+               }\r
+\r
+       unsigned GetMini() const\r
+               {\r
+               return dhi_mini;\r
+               }\r
+\r
+       unsigned GetMaxi() const\r
+               {\r
+               return dlo_maxi;\r
+               }\r
+\r
+       unsigned GetMinj() const\r
+               {\r
+               return dlo_minj;\r
+               }\r
+\r
+       unsigned GetMaxj() const\r
+               {\r
+               return dhi_maxj;\r
+               }\r
+/***\r
+       i = 0..LA-1\r
+       j = 0..LB-1\r
+       d = LA - i + j = 1 .. LA+LB-1\r
+       j = d - LA + i\r
+       i = LA - d + j\r
+***/\r
+       void GetRange_j(unsigned i, unsigned &Startj, unsigned &Endj) const\r
+               {\r
+       // j = d - LA + i\r
+               if (dlo + i >= LA)\r
+                       Startj = dlo + i - LA;\r
+               else\r
+                       Startj = 0;\r
+\r
+               if (Startj >= LB)\r
+                       Startj = LB - 1;\r
+\r
+               if (dhi + i + 1 >= LA)\r
+                       Endj = dhi + i + 1 - LA;\r
+               else\r
+                       Endj = 0;\r
+\r
+               if (Endj > LB)\r
+                       Endj = LB;\r
+\r
+               asserta(Endj >= Startj);\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n",\r
+                 LA, LB,\r
+                 dlo,\r
+                 dlo_mini, dlo_minj,\r
+                 dlo_maxi, dlo_maxj,\r
+                 dhi,\r
+                 dhi_mini, dhi_minj,\r
+                 dhi_maxi, dhi_maxj,\r
+                 GetMini(), GetMaxi(),\r
+                 GetMinj(), GetMaxj());\r
+               }\r
+       };\r
+\r
+typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB,
+  unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+
+const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB,
+  unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+\r
+#endif // diagbox_h\r
diff --git a/dp.h b/dp.h
new file mode 100644 (file)
index 0000000..c771538
--- /dev/null
+++ b/dp.h
@@ -0,0 +1,164 @@
+#ifndef dp_h\r
+#define dp_h\r
+\r
+#define SAVE_FAST      0\r
+\r
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "diagbox.h"\r
+#include "path.h"\r
+#include "alnparams.h"\r
+#include "alnheuristics.h"\r
+#include "hspfinder.h"\r
+\r
+typedef void (*OnPathFn)(const string &Path, bool Full);\r
+\r
+enum XType\r
+       {\r
+       XType_Full=1,\r
+       XType_Fwd=2,\r
+       XType_Bwd=3,\r
+       };\r
+\r
+// public\r
+float ViterbiBrute(const byte *A, unsigned LA, const byte *B, unsigned LB, \r
+  unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimpleBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned DiagLo, unsigned DiagHi, PathData &PD);\r
+\r
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastMainDiag(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned BandRadius, const AlnParams &AP, PathData &PD);\r
+\r
+float XDropFwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropFwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+void XDropAlign(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  unsigned AncLoi, unsigned AncLoj, unsigned AncLen, const AlnParams &AP,\r
+  float XDrop, HSPData &HSP, PathData &PD);\r
+\r
+float SWSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+  unsigned &Hij, PathData &PD);\r
+\r
+float SWFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+  unsigned &Hij, PathData &PD);\r
+\r
+void SWFast2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP, PathData &PD);\r
+\r
+void SWSimple2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP, PathData &PD);\r
+\r
+float SWUngapped(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const float * const *SubstMx, unsigned &LoA, unsigned &LoB, unsigned &Len);\r
+\r
+void SWUngapped2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+  HSPData &HSP);\r
+\r
+float SWFastNTB(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP);\r
+\r
+void GlobalAlignBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+  const AlnParams &AP, unsigned BandRadius, PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &AP,\r
+  const AlnHeuristics &AH, HSPFinder &HF, float MinFractId, float &HSPFractId,\r
+  PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+\r
+void GetBruteMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetXDropFwdSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#if    SAVE_FAST\r
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetFastBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#endif\r
+\r
+// private\r
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD);\r
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,\r
+  unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+void EnumPaths(unsigned L1, unsigned L2, bool SubPaths, OnPathFn OnPath);\r
+void AllocBit(unsigned LA, unsigned LB);\r
+\r
+const byte TRACEBITS_DM = 0x01;\r
+const byte TRACEBITS_IM = 0x02;\r
+const byte TRACEBITS_MD = 0x04;\r
+const byte TRACEBITS_MI = 0x08;\r
+const byte TRACEBITS_SM = 0x10;\r
+const byte TRACEBITS_UNINIT = ~0x1f;\r
+\r
+extern Mx<byte> g_Mx_TBBit;\r
+extern float *g_DPRow1;\r
+extern float *g_DPRow2;\r
+extern byte **g_TBBit;\r
+\r
+static inline void Max_xM(float &Score, float MM, float DM, float IM, byte &State)\r
+       {\r
+       Score = MM;\r
+       State = 'M';\r
+\r
+       if (DM > Score)\r
+               {\r
+               Score = DM;\r
+               State = 'D';\r
+               }\r
+       if (IM > Score)\r
+               {\r
+               Score = IM;\r
+               State = 'I';\r
+               }\r
+       }\r
+\r
+static inline void Max_xD(float &Score, float MD, float DD, byte &State)\r
+       {\r
+       if (MD >= DD)\r
+               {\r
+               Score = MD;\r
+               State = 'M';\r
+               }\r
+       else\r
+               {\r
+               Score = DD;\r
+               State = 'D';\r
+               }\r
+       }\r
+\r
+static inline void Max_xI(float &Score, float MI, float II, byte &State)\r
+       {\r
+       if (MI >= II)\r
+               {\r
+               Score = MI;\r
+               State = 'M';\r
+               }\r
+       else\r
+               {\r
+               Score = II;\r
+               State = 'I';\r
+               }\r
+       }\r
+\r
+#endif // dp_h\r
diff --git a/evalue.h b/evalue.h
new file mode 100644 (file)
index 0000000..c9308db
--- /dev/null
+++ b/evalue.h
@@ -0,0 +1,25 @@
+#ifndef evalue_h\r
+#define evalue_h\r
+\r
+#include <float.h>\r
+\r
+void SetKarlin(double GappedLambda, double UngappedLambda,\r
+  double GappedK, double UngappedK, double DBLength);\\r
+\r
+double GetKarlinDBLength();\r
+void SetKarlinDBLength(double DBLength);\r
+void LogKarlin();\r
+void SetKarlinAmino(double DBLength);\r
+void SetKarlinNucleo(double DBLength);\r
+void SetKarlin(double DBLength, bool Nucleo);\r
+double ComputeBitScoreGapped(double Score);\r
+double ComputeBitScoreUngapped(double Score);\r
+double ComputeEvalueGapped(double Score, unsigned QueryLength);\r
+double ComputeEvalueUngapped(double Score, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueAGapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueAUngapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueQGapped(double Evalue, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueQUngapped(double Evalue, unsigned QueryLength);\r
+double ComputeEvalueGappedFromBitScore(double BitScore, unsigned QueryLength);\r
+\r
+#endif // evalue_h\r
diff --git a/fractid.cpp b/fractid.cpp
new file mode 100644 (file)
index 0000000..f298877
--- /dev/null
@@ -0,0 +1,449 @@
+#include "myutils.h"\r
+#include "alpha.h"\r
+\r
+//unsigned g_MaxL = 0;\r
+\r
+static bool *g_IsChar = g_IsAminoChar;\r
+\r
+// Term gaps allowed in query (A) only\r
+static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       if (*Path == 'D')\r
+               {\r
+               if (ptrDesc != 0)\r
+                       sprintf(ptrDesc, "(term gap in Query)");\r
+               return 0;\r
+               }\r
+\r
+       const char *LastM = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               if (*p == 'M')\r
+                       LastM = p;\r
+\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p && p != LastM; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,\r
+  const char *Path, char *ptrDesc)\r
+       {\r
+       unsigned i = 0;\r
+       unsigned FirstM = UINT_MAX;\r
+       unsigned LastM = UINT_MAX;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               if (*p == 'M')\r
+                       {\r
+                       if (FirstM == UINT_MAX)\r
+                               FirstM = i;\r
+                       LastM = i;\r
+                       }\r
+               ++i;\r
+               }\r
+       if (FirstM == UINT_MAX)\r
+               {\r
+               if (ptrDesc != 0)\r
+                       strcpy(ptrDesc, "(no matches)");\r
+               return 0.0;\r
+               }\r
+\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Diffs = 0;\r
+       unsigned Cols = 0;\r
+       for (unsigned i = 0; i < FirstM; ++i)\r
+               {\r
+               char c = Path[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       for (unsigned i = FirstM; i <= LastM; ++i)\r
+               {\r
+               ++Cols;\r
+               char c = Path[i];\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               else\r
+                                       ++Diffs;\r
+                               }\r
+                       else\r
+                               --Cols;\r
+                       }\r
+               if (c == 'D' || c == 'I')\r
+                       ++Diffs;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+\r
+       double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Mismatches = 0;\r
+       unsigned Gaps = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               char c = *p;\r
+               if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))\r
+                       ++Mismatches;\r
+               if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))\r
+                       ++Gaps;\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       unsigned Diffs = Gaps + Mismatches;\r
+       double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",\r
+                 Gaps, Diffs, PosB);\r
+       double FractId = 1.0 - FractDiffs;\r
+       if (FractId < 0.0)\r
+               return 0.0;\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Wilds = 0;\r
+       unsigned Cols = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               ++Cols;\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               }\r
+                       else\r
+                               ++Wilds;\r
+                       }\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       asserta(Cols >= Wilds);\r
+       Cols -= Wilds;\r
+       double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+       return FractId;\r
+       }\r
+\r
+static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,\r
+  char *ptrDesc)\r
+       {\r
+       unsigned PosA = 0;\r
+       unsigned PosB = 0;\r
+       unsigned Ids = 0;\r
+       unsigned Wilds = 0;\r
+       for (const char *p = Path; *p; ++p)\r
+               {\r
+               char c = *p;\r
+               if (c == 'M')\r
+                       {\r
+                       byte a = toupper(A[PosA]);\r
+                       byte b = toupper(B[PosB]);\r
+                       if (g_IsChar[a] && g_IsChar[b])\r
+                               {\r
+                               if (a == b)\r
+                                       ++Ids;\r
+                               }\r
+                       else\r
+                               ++Wilds;\r
+                       }\r
+               if (c == 'M' || c == 'D')\r
+                       ++PosA;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PosB;\r
+               }\r
+       unsigned MinLen = min(PosA, PosB) - Wilds;\r
+       double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));\r
+       if (ptrDesc != 0)\r
+               sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);\r
+       return FractId;\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+  bool Nucleo, char *ptrDesc, unsigned IdDef)\r
+       {\r
+       if (Nucleo)\r
+               g_IsChar = g_IsACGTU;\r
+       else\r
+               g_IsChar = g_IsAminoChar;\r
+\r
+       if (Path == 0)\r
+               {\r
+               if (ptrDesc != 0)\r
+                       strcpy(ptrDesc, "(NULL path)");\r
+               return 0.0;\r
+               }\r
+\r
+       unsigned ColCount = (unsigned) strlen(Path);\r
+       if (ColCount == 0)\r
+               return 0.0;\r
+\r
+       if (opt_leftjust)\r
+               {\r
+               if (Path[0] != 'M' || Path[ColCount-1] == 'D')\r
+                       {\r
+                       if (ptrDesc != 0)\r
+                               strcpy(ptrDesc, "(leftjust)");\r
+                       return 0.0;\r
+                       }\r
+               }\r
+\r
+       if (opt_rightjust)\r
+               {\r
+               if (Path[0] == 'D' || Path[ColCount-1] != 'M')\r
+                       {\r
+                       if (ptrDesc != 0)\r
+                               strcpy(ptrDesc, "(rightjust)");\r
+                       return 0.0;\r
+                       }\r
+               }\r
+\r
+       double FractId = 0.0;\r
+       //if (opt_idprefix > 0)\r
+       //      {\r
+       //      for (unsigned i = 0; i < opt_idprefix; ++i)\r
+       //              {\r
+       //              char c = Path[i];\r
+       //              if (c != 'M' || toupper(A[i]) != toupper(B[i]))\r
+       //                      {\r
+       //                      if (ptrDesc != 0)\r
+       //                              sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",\r
+       //                                i, opt_idprefix);\r
+       //                      return 0.0;\r
+       //                      }\r
+       //              }\r
+       //      }\r
+\r
+       //if (opt_idsuffix > 0)\r
+       //      {\r
+       //      unsigned Cols = strlen(Path);\r
+       //      for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)\r
+       //              {\r
+       //              unsigned k = Cols - 1 - i;\r
+       //              char c = Path[k];\r
+       //              if (c != 'M' || toupper(A[k]) != toupper(B[k]))\r
+       //                      {\r
+       //                      if (ptrDesc != 0)\r
+       //                              sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",\r
+       //                                i, opt_idsuffix);\r
+       //                      return 0.0;\r
+       //                      }\r
+       //              }\r
+       //      }\r
+\r
+       if (opt_maxqgap > 0 || opt_maxtgap > 0)\r
+               {\r
+               unsigned L = 0;\r
+               const char *LastM = 0;\r
+               for (const char *p = Path; *p; ++p)\r
+                       if (*p == 'M')\r
+                               LastM = p;\r
+\r
+//             g_MaxL = 0;\r
+               for (const char *p = Path; *p && p != LastM; ++p)\r
+                       {\r
+                       char c = *p;\r
+                       switch (c)\r
+                               {\r
+                       case 'M':\r
+                               if (L > 0)\r
+                                       {\r
+                                       if (p[-1] == 'D')\r
+                                               {\r
+                                               if (L > opt_maxtgap)\r
+                                                       {\r
+                                                       if (ptrDesc != 0)\r
+                                                               sprintf(ptrDesc, "(maxtgap)");\r
+                                                       return 0.0;\r
+                                                       }\r
+                                               }\r
+                                       else if (p[-1] == 'I')\r
+                                               {\r
+                                               if (L > opt_maxqgap)\r
+                                                       {\r
+                                                       if (ptrDesc != 0)\r
+                                                               sprintf(ptrDesc, "(maxqgap)");\r
+                                                       return 0.0;\r
+                                                       }\r
+                                               }\r
+                                       else\r
+                                               asserta(false);\r
+                                       }\r
+                               L = 0;\r
+                               break;\r
+\r
+                       case 'D':\r
+                       case 'I':\r
+                               ++L;\r
+                               //if (L > g_MaxL)\r
+                               //      g_MaxL = L;\r
+                               break;\r
+\r
+                       default:\r
+                               asserta(false);\r
+                               }\r
+                       }\r
+               }\r
+\r
+       switch (IdDef)\r
+               {\r
+       case 0:\r
+               FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 1:\r
+               FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 2:\r
+               FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 3:\r
+               FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 4:\r
+               FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       case 5:\r
+               FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);\r
+               break;\r
+\r
+       default:\r
+               Die("--iddef %u invalid", opt_iddef);\r
+               }\r
+\r
+       return FractId;\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+  bool Nucleo, char *ptrDesc)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path.c_str(), true);\r
+       }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)\r
+       {\r
+       return GetFractIdGivenPath(A, B, Path, true);\r
+       }\r
diff --git a/getparents.cpp b/getparents.cpp
new file mode 100644 (file)
index 0000000..d82f902
--- /dev/null
@@ -0,0 +1,89 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+void AddTargets(Ultra &U, const SeqData &Query, set<unsigned> &TargetIndexes);\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los)\r
+       {\r
+       Los.clear();\r
+\r
+       if (L <= opt_minchunk)\r
+               {\r
+               Length = L;\r
+               Los.push_back(0);\r
+               return;\r
+               }\r
+\r
+       Length = (L - 1)/opt_chunks + 1;\r
+       if (Length < opt_minchunk)\r
+               Length = opt_minchunk;\r
+\r
+       unsigned Lo = 0;\r
+       for (;;)\r
+               {\r
+               if (Lo + Length >= L)\r
+                       {\r
+                       Lo = L - Length - 1;\r
+                       Los.push_back(Lo);\r
+                       return;\r
+                       }\r
+               Los.push_back(Lo);\r
+               Lo += Length;\r
+               }\r
+       }\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+  vector<unsigned> &Parents)\r
+       {\r
+       Parents.clear();\r
+\r
+       set<unsigned> TargetIndexes;\r
+\r
+       unsigned QL = QSD.L;\r
+\r
+       SeqData QuerySD = QSD;\r
+\r
+       unsigned ChunkLength;\r
+       vector<unsigned> ChunkLos;\r
+       GetChunkInfo(QL, ChunkLength, ChunkLos);\r
+       unsigned ChunkCount = SIZE(ChunkLos);\r
+       for (unsigned ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)\r
+               {\r
+               unsigned Lo = ChunkLos[ChunkIndex];\r
+               asserta(Lo + ChunkLength <= QL);\r
+\r
+               const byte *Chunk = QSD.Seq + Lo;\r
+\r
+       // THIS MESSES UP --self!!\r
+               //char Prefix[32];\r
+               //sprintf(Prefix, "%u|", Lo);\r
+               //string ChunkLabel = string(Prefix) + string(QSD.Label);\r
+\r
+               //QuerySD.Label = ChunkLabel.c_str();\r
+               QuerySD.Seq = Chunk;\r
+               QuerySD.L = ChunkLength;\r
+\r
+               AddTargets(U, QuerySD, TargetIndexes);\r
+\r
+               Lo += ChunkLength;\r
+               }\r
+\r
+       for (set<unsigned>::const_iterator p = TargetIndexes.begin();\r
+         p != TargetIndexes.end(); ++p)\r
+               {\r
+               unsigned TargetIndex = *p;\r
+               bool Accept = true;\r
+               if (AbQ > 0.0f)\r
+                       {\r
+                       const char *TargetLabel = U.GetSeedLabel(TargetIndex);\r
+                       float AbT = GetAbFromLabel(string(TargetLabel));\r
+                       if (AbT > 0.0f && AbT < opt_abskew*AbQ)\r
+                               Accept = false;\r
+                       }\r
+\r
+               if (Accept)\r
+                       Parents.push_back(TargetIndex);\r
+               }\r
+       }\r
diff --git a/globalalign2.cpp b/globalalign2.cpp
new file mode 100644 (file)
index 0000000..6bb35a9
--- /dev/null
@@ -0,0 +1,45 @@
+//#if  UCHIMES\r
+\r
+#include "dp.h"\r
+#include "seq.h"\r
+\r
+static AlnParams g_AP;\r
+static bool g_APInitDone = false;\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, PathData &PD)\r
+       {\r
+       if (!g_APInitDone)\r
+               {\r
+               g_AP.InitFromCmdLine(true);\r
+               g_APInitDone = true;\r
+               }\r
+\r
+       ViterbiFast(Query.Seq, Query.L, Target.Seq, Target.L, g_AP, PD);\r
+       return true;\r
+       }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path)\r
+       {\r
+       PathData PD;\r
+       GlobalAlign(Query, Target, PD);\r
+       Path = string(PD.Start);\r
+       return true;\r
+       }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &/*AP*/,\r
+  const AlnHeuristics &AH, HSPFinder &/*HF*/, float /*MinFractId*/, float &/*HSPId*/, PathData &PD)\r
+       {\r
+       PD.Clear();\r
+       string Path;\r
+       bool Found = GlobalAlign(Query, Target, Path);\r
+       if (!Found)\r
+               return false;\r
+       unsigned n = SIZE(Path);\r
+       PD.Alloc(n+1);\r
+       memcpy(PD.Front, Path.c_str(), n);\r
+       PD.Start = PD.Front;\r
+       PD.Start[n] = 0;\r
+       return true;\r
+       }\r
+\r
+//#endif // UCHIMES\r
diff --git a/help.h b/help.h
new file mode 100644 (file)
index 0000000..9d7a89f
--- /dev/null
+++ b/help.h
@@ -0,0 +1,127 @@
+"\n"
+"Usage\n"
+"-----\n"
+"\n"
+"uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n"
+"    [--uchimealns results.alns]\n"
+"\n"
+"Options\n"
+"-------\n"
+"\n"
+"--input filename\n"
+"    Query sequences in FASTA format.\n"
+"    If the --db option is not specificed, uchime uses de novo\n"
+"    detection. In de novo mode, relative abundance must be given\n"
+"    by a string /ab=xxx/ somewhere in the label, where xxx is a\n"
+"    floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n"
+"\n"
+"--db filename\n"
+"    Reference database in FASTA format.\n"
+"    Optional, if not specified uchime uses de novo mode.\n"
+"\n"
+"    ***WARNING*** The database is searched ONLY on the plus strand.\n"
+"    You MUST include reverse-complemented sequences in the database\n"
+"    if you want both strands to be searched.\n"
+"\n"
+"--abskew x\n"
+"    Minimum abundance skew. Default 1.9. De novo mode only.\n"
+"    Abundance skew is:\n"
+"        min [ abund(parent1), abund(parent2) ] / abund(query).\n"
+"\n"
+"--uchimeout filename\n"
+"    Output in tabbed format with one record per query sequence.\n"
+"    First field is score (h), second field is query label.\n"
+"    For details, see manual.\n"
+"\n"
+"--uchimealns filename\n"
+"    Multiple alignments of query sequences to parents in human-\n"
+"    readable format. Alignments show columns with differences\n"
+"    that support or contradict a chimeric model.\n"
+"\n"
+"--minh h\n"
+"    Mininum score to report chimera. Default 0.3. Values from 0.1\n"
+"    to 5 might be reasonable. Lower values increase sensitivity\n"
+"    but may report more false positives. If you decrease --xn,\n"
+"    you may need to increase --minh, and vice versa.\n"
+"\n"
+"--mindiv div\n"
+"    Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n"
+"    %%identity between query sequence and the closest candidate for\n"
+"    being a parent. If you don't care about very close chimeras,\n"
+"    then you could increase --mindiv to, say, 1.0 or 2.0, and\n"
+"    also decrease --min h, say to 0.1, to increase sensitivity.\n"
+"    How well this works will depend on your data. Best is to\n"
+"    tune parameters on a good benchmark.\n"
+"\n"
+"--xn beta\n"
+"    Weight of a no vote, also called the beta parameter. Default 8.0.\n"
+"    Decreasing this weight to around 3 or 4 may give better\n"
+"    performance on denoised data.\n"
+"\n"
+"--dn n\n"
+"    Pseudo-count prior on number of no votes. Default 1.4. Probably\n"
+"    no good reason to change this unless you can retune to a good\n"
+"    benchmark for your data. Reasonable values are probably in the\n"
+"    range from 0.2 to 2.\n"
+"\n"
+"--xa w\n"
+"    Weight of an abstain vote. Default 1. So far, results do not\n"
+"    seem to be very sensitive to this parameter, but if you have\n"
+"    a good training set might be worth trying. Reasonable values\n"
+"    might range from 0.1 to 2.\n"
+"\n"
+"--chunks n\n"
+"    Number of chunks to extract from the query sequence when searching\n"
+"    for parents. Default 4.\n"
+"\n"
+"--[no]ovchunks\n"
+"    [Do not] use overlapping chunks. Default do not.\n"
+"\n"
+"--minchunk n\n"
+"    Minimum length of a chunk. Default 64.\n"
+"\n"
+"--idsmoothwindow w\n"
+"    Length of id smoothing window. Default 32.\n"
+"\n"
+"--minsmoothid f\n"
+"    Minimum factional identity over smoothed window of candidate parent.\n"
+"    Default 0.95.\n"
+"\n"
+"--maxp n\n"
+"    Maximum number of candidate parents to consider. Default 2. In tests so\n"
+"    far, increasing --maxp gives only a very small improvement in sensivity\n"
+"    but tends to increase the error rate quite a bit.\n"
+"\n"
+"--[no]skipgaps\n"
+"--[no]skipgaps2\n"
+"    These options control how gapped columns affect counting of diffs.\n"
+"    If --skipgaps is specified, columns containing gaps do not found as diffs.\n"
+"    If --skipgaps2 is specified, if column is immediately adjacent to\n"
+"    a column containing a gap, it is not counted as a diff.\n"
+"    Default is --skipgaps --skipgaps2.\n"
+"\n"
+"--minlen L\n"
+"--maxlen L\n"
+"    Minimum and maximum sequence length. Defaults 10, 10000.\n"
+"    Applies to both query and reference sequences.\n"
+"\n"
+"--ucl\n"
+"    Use local-X alignments. Default is global-X. On tests so far, global-X\n"
+"    is always better; this option is retained because it just might work\n"
+"    well on some future type of data.\n"
+"\n"
+"--queryfract f\n"
+"    Minimum fraction of the query sequence that must be covered by a local-X\n"
+"    alignment. Default 0.5. Applies only when --ucl is specified.\n"
+"\n"
+"--quiet\n"
+"    Do not display progress messages on stderr.\n"
+"\n"
+"--log filename\n"
+"    Write miscellaneous information to the log file. Mostly of interest\n"
+"    to me (the algorithm developer). Use --verbose to get more info.\n"
+"\n"
+"--self\n"
+"    In reference database mode, exclude a reference sequence if it has\n"
+"    the same label as the query. This is useful for benchmarking by using\n"
+"    the ref db as a query to test for false positives.\n"
diff --git a/hsp.h b/hsp.h
new file mode 100644 (file)
index 0000000..339256f
--- /dev/null
+++ b/hsp.h
@@ -0,0 +1,114 @@
+#ifndef hsp_h\r
+#define hsp_h  1\r
+\r
+struct HSPData\r
+       {\r
+       unsigned Loi;\r
+       unsigned Loj;\r
+       unsigned Leni;\r
+       unsigned Lenj;\r
+       float Score;\r
+       unsigned User;\r
+\r
+       unsigned GetLength() const\r
+               {\r
+               if (Leni != Lenj)\r
+                       Die("HSP::GetLength(): Leni %u, Lenj %u, Loi %u, Loj %u, Score %.1f",\r
+                         Leni, Lenj, Loi, Loj, Score);\r
+\r
+               return Leni;\r
+               }\r
+\r
+       unsigned GetHii() const\r
+               {\r
+               assert(Leni > 0);\r
+               return Loi + Leni - 1;\r
+               }\r
+\r
+       unsigned GetHij() const\r
+               {\r
+               assert(Lenj > 0);\r
+               return Loj + Lenj - 1;\r
+               }\r
+\r
+       bool LeftA() const\r
+               {\r
+               return Loi == 0;\r
+               }\r
+\r
+       bool LeftB() const\r
+               {\r
+               return Loj == 0;\r
+               }\r
+\r
+       bool RightA(unsigned LA) const\r
+               {\r
+               return Loi + Leni == LA;\r
+               }\r
+\r
+       bool RightB(unsigned LB) const\r
+               {\r
+               return Loj + Lenj == LB;\r
+               }\r
+\r
+       unsigned GetIdCount(const byte *A, const byte *B) const\r
+               {\r
+               unsigned Count = 0;\r
+               unsigned K = GetLength();\r
+               for (unsigned k = 0; k < K; ++k)\r
+                       {\r
+                       byte a = A[Loi+k];\r
+                       byte b = B[Loj+k];\r
+                       if (toupper(a) == toupper(b))\r
+                               Count++;\r
+                       }\r
+               return Count;\r
+               }\r
+\r
+       double OverlapFract(const HSPData &HSP) const\r
+               {\r
+               if (Leni == 0 || Lenj == 0)\r
+                       return 0.0;\r
+\r
+               unsigned MaxLoi = max(Loi, HSP.Loi);\r
+               unsigned MaxLoj = max(Loj, HSP.Loj);\r
+               unsigned MinHii = min(GetHii(), HSP.GetHii());\r
+               unsigned MinHij = min(GetHij(), HSP.GetHij());\r
+\r
+               unsigned Ovi = (MinHii < MaxLoi) ? 0 : MinHii - MaxLoi;\r
+               unsigned Ovj = (MinHij < MaxLoj) ? 0 : MinHij - MaxLoj;\r
+\r
+               asserta(Ovi <= Leni && Ovj <= Lenj);\r
+               return double(Ovi*Ovj)/double(Leni*Lenj);\r
+               }\r
+\r
+       bool operator<(const HSPData &rhs) const\r
+               {\r
+               return Loi < rhs.Loi;\r
+               }\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("Loi=%u Loj=%u Li=%u Lj=%u Score=%.1f\n", Loi, Loj, Leni, Lenj, Score);\r
+               }\r
+\r
+       void LogMe2() const\r
+               {\r
+               Log("(%u-%u,%u-%u/%.1f)", Loi, GetHii(), Loj, GetHij(), Score);\r
+               }\r
+       };\r
+\r
+// Bendpoint\r
+struct BPData\r
+       {\r
+       unsigned Pos;\r
+       bool IsLo;\r
+       unsigned Index;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index);\r
+               }\r
+       };\r
+\r
+#endif // hsp_h\r
diff --git a/hspfinder.h b/hspfinder.h
new file mode 100644 (file)
index 0000000..2b8e9d8
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef hspfinder_h
+#define hspfinder_h
+
+#include "seq.h"
+
+class HSPFinder
+       {
+public:
+       void SetA(const SeqData &/*SD*/) {}
+       void SetB(const SeqData &/*SD*/) {}
+       };
+
+#endif // hspfinder_h
diff --git a/make3way.cpp b/make3way.cpp
new file mode 100644 (file)
index 0000000..ce88f86
--- /dev/null
@@ -0,0 +1,173 @@
+#include "myutils.h"\r
+#include "sfasta.h"\r
+#include "path.h"\r
+#include "dp.h"\r
+\r
+void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3)\r
+       {\r
+       Q3.clear();\r
+       A3.clear();\r
+       B3.clear();\r
+\r
+#if    DEBUG\r
+       {\r
+       unsigned QLen = 0;\r
+       unsigned ALen = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QLen;\r
+               if (c == 'M' || c == 'I')\r
+                       ++ALen;\r
+               }\r
+       asserta(QLen == QSD.L);\r
+       asserta(ALen == ASD.L);\r
+       }\r
+       {\r
+       unsigned QLen = 0;\r
+       unsigned BLen = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QLen;\r
+               if (c == 'M' || c == 'I')\r
+                       ++BLen;\r
+               }\r
+       asserta(QLen == QSD.L);\r
+       asserta(BLen == BSD.L);\r
+       }\r
+#endif\r
+\r
+       const byte *Q = QSD.Seq;\r
+       const byte *A = ASD.Seq;\r
+       const byte *B = BSD.Seq;\r
+\r
+       unsigned LQ = QSD.L;\r
+       unsigned LA = ASD.L;\r
+       unsigned LB = BSD.L;\r
+\r
+       vector<unsigned> InsertCountsA(LQ+1, 0);\r
+       unsigned QPos = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               else\r
+                       {\r
+                       asserta(c == 'I');\r
+                       asserta(QPos <= LQ);\r
+                       ++(InsertCountsA[QPos]);\r
+                       }\r
+               }\r
+\r
+       vector<unsigned> InsertCountsB(LQ+1, 0);\r
+       QPos = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               else\r
+                       {\r
+                       asserta(c == 'I');\r
+                       asserta(QPos <= LQ);\r
+                       ++(InsertCountsB[QPos]);\r
+                       }\r
+               }\r
+\r
+       vector<unsigned> InsertCounts;\r
+       for (unsigned i = 0; i <= LQ; ++i)\r
+               {\r
+               unsigned is = max(InsertCountsA[i], InsertCountsB[i]);\r
+               InsertCounts.push_back(is);\r
+               }\r
+\r
+       for (unsigned i = 0; i < LQ; ++i)\r
+               {\r
+               for (unsigned k = 0; k < InsertCounts[i]; ++k)\r
+                       Q3.push_back('-');\r
+               asserta(i < LQ);\r
+               Q3.push_back(toupper(Q[i]));\r
+               }\r
+       for (unsigned k = 0; k < InsertCounts[LQ]; ++k)\r
+               Q3.push_back('-');\r
+\r
+// A\r
+       QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned is = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+               {\r
+               char c = PathQA[i];\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       unsigned isq = InsertCounts[QPos];\r
+                       asserta(is <= isq);\r
+                       for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+                               A3.push_back('-');\r
+                       is = 0;\r
+                       ++QPos;\r
+                       }\r
+               if (c == 'M')\r
+                       {\r
+                       asserta(APos < LA);\r
+                       A3.push_back(toupper(A[APos++]));\r
+                       }\r
+               else if (c == 'D')\r
+                       A3.push_back('-');\r
+               else if (c == 'I')\r
+                       {\r
+                       ++is;\r
+                       asserta(APos < LA);\r
+                       A3.push_back(toupper(A[APos++]));\r
+                       }\r
+               }\r
+       asserta(is <= InsertCounts[LQ]);\r
+       for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+               A3.push_back('-');\r
+       asserta(QPos == LQ);\r
+       asserta(APos == LA);\r
+\r
+// B\r
+       QPos = 0;\r
+       unsigned BPos = 0;\r
+       is = 0;\r
+       for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+               {\r
+               char c = PathQB[i];\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       asserta(is <= InsertCounts[QPos]);\r
+                       for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+                               B3.push_back('-');\r
+                       is = 0;\r
+                       ++QPos;\r
+                       }\r
+               if (c == 'M')\r
+                       {\r
+                       asserta(BPos < LB);\r
+                       B3.push_back(toupper(B[BPos++]));\r
+                       }\r
+               else if (c == 'D')\r
+                       B3.push_back('-');\r
+               else if (c == 'I')\r
+                       {\r
+                       ++is;\r
+                       asserta(BPos < LB);\r
+                       B3.push_back(toupper(B[BPos++]));\r
+                       }\r
+               }\r
+       asserta(is <= InsertCounts[LQ]);\r
+       for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+               B3.push_back('-');\r
+       asserta(APos == LA);\r
+       asserta(BPos == LB);\r
+\r
+       asserta(SIZE(Q3) == SIZE(A3));\r
+       asserta(SIZE(Q3) == SIZE(B3));\r
+       }\r
index c2ef698db8ed4d9c0769fcd4295a27a886c65bfc..3205ae794165a5b8c045c473403d8a66b1b54f90 100644 (file)
@@ -1154,7 +1154,43 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                exit(1);
        }
 }
-
+/**************************************************************************************************/
+int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
+       try{
+               
+               vector<unsigned long int> filePos = divideFile(filename, proc);
+               
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       
+                       //read file chunk
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(filePos[i]);
+                       unsigned long int size = filePos[(i+1)] - filePos[i];
+                       char* chunk = new char[size];
+                       in.read(chunk, size);
+                       in.close();
+                       
+                       //open new file
+                       string fileChunkName = filename + "." + toString(i) + ".tmp";
+                       ofstream out; 
+                       openOutputFile(fileChunkName, out);
+                       
+                       out << chunk << endl;
+                       out.close();
+                       delete[] chunk;
+                       
+                       //save name
+                       files.push_back(fileChunkName);
+               }
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "divideFile");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 bool MothurOut::isTrue(string f){
index f6f505dce6409db716561b061a2e66eda3efae5a..8ac84007de59225ccffb9642a4d0dd023818e40a 100644 (file)
@@ -44,6 +44,7 @@ class MothurOut {
                //functions from mothur.h
                //file operations
                vector<unsigned long int> divideFile(string, int&);
+               int divideFile(string, int&, vector<string>&);
                vector<unsigned long int> setFilePosEachLine(string, int&);
                vector<unsigned long int> setFilePosFasta(string, int&);
                string sortFile(string, string);
diff --git a/mx.cpp b/mx.cpp
new file mode 100644 (file)
index 0000000..48c347e
--- /dev/null
+++ b/mx.cpp
@@ -0,0 +1,294 @@
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+\r
+char ProbToChar(float p);\r
+\r
+list<MxBase *> *MxBase::m_Matrices = 0;\r
+unsigned MxBase::m_AllocCount;\r
+unsigned MxBase::m_ZeroAllocCount;\r
+unsigned MxBase::m_GrowAllocCount;\r
+double MxBase::m_TotalBytes;\r
+double MxBase::m_MaxBytes;\r
+\r
+static const char *LogizeStr(const char *s)\r
+       {\r
+       double d = atof(s);\r
+       d = log(d);\r
+       return TypeToStr<float>(float(d));\r
+       }\r
+\r
+static const char *ExpizeStr(const char *s)\r
+       {\r
+       double d = atof(s);\r
+       d = exp(d);\r
+       return TypeToStr<float>(float(d));\r
+       }\r
+\r
+void MxBase::OnCtor(MxBase *Mx)\r
+       {\r
+       if (m_Matrices == 0)\r
+               m_Matrices = new list<MxBase *>;\r
+       asserta(m_Matrices != 0);\r
+       m_Matrices->push_front(Mx);\r
+       }\r
+\r
+void MxBase::OnDtor(MxBase *Mx)\r
+       {\r
+       if (m_Matrices == 0)\r
+               {\r
+               Warning("MxBase::OnDtor, m_Matrices = 0");\r
+               return;\r
+               }\r
+       for (list<MxBase*>::iterator p = m_Matrices->begin();\r
+         p != m_Matrices->end(); ++p)\r
+               {\r
+               if (*p == Mx)\r
+                       {\r
+                       m_Matrices->erase(p);\r
+                       if (m_Matrices->empty())\r
+                               delete m_Matrices;\r
+                       return;\r
+                       }\r
+               }\r
+       Warning("MxBase::OnDtor, not found");\r
+       }\r
+\r
+//float **MxBase::Getf(const string &Name)\r
+//     {\r
+//     Mx<float> *m = (Mx<float> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(float));\r
+//     return m->GetData();\r
+//     }\r
+//\r
+//double **MxBase::Getd(const string &Name)\r
+//     {\r
+//     Mx<double> *m = (Mx<double> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(double));\r
+//     return m->GetData();\r
+//     }\r
+//\r
+//char **MxBase::Getc(const string &Name)\r
+//     {\r
+//     Mx<char> *m = (Mx<char> *) Get(Name);\r
+//     asserta(m->GetTypeSize() == sizeof(char));\r
+//     return m->GetData();\r
+//     }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqDB *DB, unsigned IdA, unsigned IdB)\r
+       {\r
+       Alloc(Name, RowCount, ColCount, DB, IdA, IdB, 0, 0);\r
+       }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqData *SA, const SeqData *SB)\r
+       {\r
+       Alloc(Name, RowCount, ColCount, 0, UINT_MAX, UINT_MAX, SA, SB);\r
+       }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+  const SeqDB *DB, unsigned IdA, unsigned IdB, const SeqData *SA, const SeqData *SB)\r
+       {\r
+       StartTimer(MxBase_Alloc);\r
+\r
+       ++m_AllocCount;\r
+       if (m_AllocatedRowCount == 0)\r
+               ++m_ZeroAllocCount;\r
+\r
+       if (DB != 0)\r
+               {\r
+               asserta(IdA != UINT_MAX);\r
+               asserta(IdB != UINT_MAX);\r
+               asserta(RowCount >= DB->GetSeqLength(IdA) + 1);\r
+               asserta(ColCount >= DB->GetSeqLength(IdB) + 1);\r
+               }\r
+       if (RowCount > m_AllocatedRowCount || ColCount > m_AllocatedColCount)\r
+               {\r
+               if (m_AllocatedRowCount > 0)\r
+                       {\r
+                       if (opt_logmemgrows)\r
+                               Log("MxBase::Alloc grow %s %u x %u -> %u x %u, %s bytes\n",\r
+                                 Name, m_AllocatedRowCount, m_AllocatedColCount,\r
+                                 RowCount, ColCount,\r
+                                 IntToStr(GetBytes()));\r
+                       ++m_GrowAllocCount;\r
+                       }\r
+\r
+               m_TotalBytes -= GetBytes();\r
+\r
+               PauseTimer(MxBase_Alloc);\r
+               StartTimer(MxBase_FreeData);\r
+               FreeData();\r
+               EndTimer(MxBase_FreeData);\r
+               StartTimer(MxBase_Alloc);\r
+\r
+               unsigned N = max(RowCount + 16, m_AllocatedRowCount);\r
+               unsigned M = max(ColCount + 16, m_AllocatedColCount);\r
+               N = max(N, M);\r
+\r
+               PauseTimer(MxBase_Alloc);\r
+               StartTimer(MxBase_AllocData);\r
+               AllocData(N, N);\r
+               EndTimer(MxBase_AllocData);\r
+               StartTimer(MxBase_Alloc);\r
+\r
+               m_TotalBytes += GetBytes();\r
+               if (m_TotalBytes > m_MaxBytes)\r
+                       m_MaxBytes = m_TotalBytes;\r
+               }\r
+       \r
+       unsigned n = sizeof(m_Name)-1;\r
+       strncpy(m_Name, Name, n);\r
+       m_Name[n] = 0;\r
+       m_RowCount = RowCount;\r
+       m_ColCount = ColCount;\r
+       m_SeqDB = DB;\r
+       m_IdA = IdA;\r
+       m_IdB = IdB;\r
+       m_SA = SA;\r
+       m_SB = SB;\r
+\r
+       EndTimer(MxBase_Alloc);\r
+       }\r
+\r
+void MxBase::LogMe(bool WithData, int Opts) const\r
+       {\r
+       Log("\n");\r
+       if (Opts & OPT_EXP)\r
+               Log("Exp ");\r
+       else if (Opts & OPT_LOG)\r
+               Log("Log ");\r
+       bool ZeroBased = ((Opts & OPT_ZERO_BASED) != 0);\r
+       Log("%s(%p) Rows %u/%u, Cols %u/%u",\r
+         m_Name, this,\r
+         m_RowCount, m_AllocatedRowCount,\r
+         m_ColCount, m_AllocatedColCount);\r
+       if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+               Log(", A=%s", m_SeqDB->GetLabel(m_IdA));\r
+       else if (m_SA != 0)\r
+               Log(", A=%s", m_SA->Label);\r
+       if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+               Log(", B=%s", m_SeqDB->GetLabel(m_IdB));\r
+       else if (m_SB != 0)\r
+               Log(", B=%s", m_SB->Label);\r
+       Log("\n");\r
+       if (!WithData || m_RowCount == 0 || m_ColCount == 0)\r
+               return;\r
+\r
+       const char *z = GetAsStr(0, 0);\r
+       unsigned Width = strlen(z);\r
+       unsigned Mod = 1;\r
+       for (unsigned i = 0; i < Width; ++i)\r
+               Mod *= 10;\r
+\r
+       if (m_Alpha[0] != 0)\r
+               {\r
+               Log("// Alphabet=%s\n", m_Alpha);\r
+               Log("//      ");\r
+               unsigned n = strlen(m_Alpha);\r
+               for (unsigned j = 0; j < n; ++j)\r
+                       Log(" %*c", Width, m_Alpha[j]);\r
+               Log("\n");\r
+               for (unsigned i = 0; i < n; ++i)\r
+                       {\r
+                       Log("/* %c */ {", m_Alpha[i]);\r
+                       unsigned ci = m_Alpha[i];\r
+                       for (unsigned j = 0; j < n; ++j)\r
+                               {\r
+                               unsigned cj = m_Alpha[j];\r
+                               Log("%s,", GetAsStr(ci, cj));\r
+                               }\r
+                       Log("},  // %c\n", m_Alpha[i]);\r
+                       }\r
+               return;\r
+               }\r
+       else if (m_Alpha2[0] != 0)\r
+               {\r
+               unsigned n = strlen(m_Alpha2);\r
+               Log("// Alphabet=%s\n", m_Alpha2);\r
+               Log("//      ");\r
+               for (unsigned j = 0; j < n; ++j)\r
+                       Log(" %*c", Width, m_Alpha2[j]);\r
+               Log("\n");\r
+               for (unsigned i = 0; i < n; ++i)\r
+                       {\r
+                       Log("/* %c */ {", m_Alpha2[i]);\r
+                       unsigned ci = m_Alpha2[i];\r
+                       for (unsigned j = 0; j < n; ++j)\r
+                               Log("%s,", GetAsStr(i, j));\r
+                       Log("},  // %c\n", m_Alpha2[i]);\r
+                       }\r
+               return;\r
+               }\r
+\r
+       const byte *A = 0;\r
+       const byte *B = 0;\r
+       if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+               A = m_SeqDB->GetSeq(m_IdA);\r
+       else if (m_SA != 0)\r
+               A = m_SA->Seq;\r
+       if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+               B = m_SeqDB->GetSeq(m_IdB);\r
+       else if (m_SB != 0)\r
+               B = m_SB->Seq;\r
+\r
+       if (B != 0)\r
+               {\r
+               if (A != 0)\r
+                       Log("  ");\r
+               Log("%5.5s", "");\r
+               if (ZeroBased)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               Log("%*c", Width, B[j]);\r
+               else\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               Log("%*c", Width, j == 0 ? ' ' : B[j-1]);\r
+               Log("\n");\r
+               }\r
+\r
+       if (A != 0)\r
+               Log("  ");\r
+       Log("%5.5s", "");\r
+       for (unsigned j = 0; j < m_ColCount; ++j)\r
+               Log("%*u", Width, j%Mod);\r
+       Log("\n");\r
+\r
+       for (unsigned i = 0; i < m_RowCount; ++i)\r
+               {\r
+               if (A != 0)\r
+                       {\r
+                       if (ZeroBased)\r
+                               Log("%c ", A[i]);\r
+                       else\r
+                               Log("%c ", i == 0 ? ' ' : A[i-1]);\r
+                       }\r
+               Log("%4u ", i);\r
+               \r
+               for (unsigned j = 0; j < m_ColCount; ++j)\r
+                       {\r
+                       const char *s = GetAsStr(i, j);\r
+                       if (Opts & OPT_LOG)\r
+                               s = LogizeStr(s);\r
+                       else if (Opts & OPT_EXP)\r
+                               s = ExpizeStr(s);\r
+                       Log("%s", s);\r
+                       }\r
+               Log("\n");\r
+               }\r
+       }\r
+static unsigned g_MatrixFileCount;\r
+\r
+void MxBase::LogCounts()\r
+       {\r
+       Log("\n");\r
+       Log("MxBase::LogCounts()\n");\r
+       Log("      What           N\n");\r
+       Log("----------  ----------\n");\r
+       Log("    Allocs  %10u\n", m_AllocCount);\r
+       Log("ZeroAllocs  %10u\n", m_ZeroAllocCount);\r
+       Log("     Grows  %10u\n", m_GrowAllocCount);\r
+       Log("     Bytes  %10.10s\n", MemBytesToStr(m_TotalBytes));\r
+       Log(" Max bytes  %10.10s\n", MemBytesToStr(m_MaxBytes));\r
+       }\r
diff --git a/mx.h b/mx.h
new file mode 100644 (file)
index 0000000..1438900
--- /dev/null
+++ b/mx.h
@@ -0,0 +1,454 @@
+#ifndef mx_h\r
+#define mx_h\r
+\r
+#include <list>\r
+#include <limits.h>\r
+#include <math.h>\r
+#include "timing.h"\r
+#include "myutils.h"\r
+\r
+const int OPT_LOG = 0x01;\r
+const int OPT_EXP = 0x02;\r
+const int OPT_ZERO_BASED = 0x04;\r
+const float MINUS_INFINITY = -9e9f;\r
+const float UNINIT = -8e8f;\r
+\r
+struct SeqData;\r
+\r
+template<class T> const char *TypeToStr(T t)\r
+       {\r
+       Die("Unspecialised TypeToStr() called");\r
+       ureturn(0);\r
+       }\r
+\r
+template<> inline const char *TypeToStr<unsigned short>(unsigned short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12u", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<short>(short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<int>(int f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%5d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<float>(float f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f == UNINIT)\r
+               sprintf(s, "%12.12s", "?");\r
+       else if (f < MINUS_INFINITY/2)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<double>(double f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f < -1e9)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e-5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+static inline const char *FloatToStr(float f, string &s)\r
+       {\r
+       s = TypeToStr<float>(f);\r
+       return s.c_str();\r
+       }\r
+\r
+template<> inline const char *TypeToStr<char>(char c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<byte>(byte c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<bool>(bool tof)\r
+       {\r
+       static char s[2];\r
+       s[0] = tof ? 'T' : 'F';\r
+       return s;\r
+       }\r
+\r
+struct SeqDB;\r
+\r
+struct MxBase\r
+       {\r
+private:\r
+       MxBase(const MxBase &rhs);\r
+       MxBase &operator=(const MxBase &rhs);\r
+\r
+public:\r
+       char m_Name[32];\r
+       char m_Alpha[32];\r
+       char m_Alpha2[32];\r
+       unsigned m_RowCount;\r
+       unsigned m_ColCount;\r
+       unsigned m_AllocatedRowCount;\r
+       unsigned m_AllocatedColCount;\r
+       const SeqDB *m_SeqDB;\r
+       unsigned m_IdA;\r
+       unsigned m_IdB;\r
+       const SeqData *m_SA;\r
+       const SeqData *m_SB;\r
+\r
+       static list<MxBase *> *m_Matrices;\r
+       //static MxBase *Get(const string &Name);\r
+       //static float **Getf(const string &Name);\r
+       //static double **Getd(const string &Name);\r
+       //static char **Getc(const string &Name);\r
+\r
+       static unsigned m_AllocCount;\r
+       static unsigned m_ZeroAllocCount;\r
+       static unsigned m_GrowAllocCount;\r
+       static double m_TotalBytes;\r
+       static double m_MaxBytes;\r
+\r
+       static void OnCtor(MxBase *Mx);\r
+       static void OnDtor(MxBase *Mx);\r
+\r
+       MxBase()\r
+               {\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SeqDB = 0;\r
+               OnCtor(this);\r
+               }\r
+       virtual ~MxBase()\r
+               {\r
+               OnDtor(this);\r
+               }\r
+\r
+       virtual unsigned GetTypeSize() const = 0;\r
+       virtual unsigned GetBytes() const = 0;\r
+\r
+       void Clear()\r
+               {\r
+               FreeData();\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SA = 0;\r
+               m_SB = 0;\r
+               }\r
+\r
+       bool Empty() const\r
+               {\r
+               return m_RowCount == 0;\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0;\r
+       virtual void FreeData() = 0;\r
+       virtual const char *GetAsStr(unsigned i, unsigned j) const = 0;\r
+\r
+       void SetAlpha(const char *Alpha)\r
+               {\r
+               unsigned n = sizeof(m_Alpha);\r
+               strncpy(m_Alpha, Alpha, n);\r
+               m_Alpha[n] = 0;\r
+               }\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB, unsigned IdA, unsigned IdB,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       static void LogAll()\r
+               {\r
+               Log("\n");\r
+               if (m_Matrices == 0)\r
+                       {\r
+                       Log("MxBase::m_Matrices=0\n");\r
+                       return;\r
+                       }\r
+               Log("\n");\r
+               Log("AllRows  AllCols    Sz        MB  Name\n");\r
+               Log("-------  -------  ----  --------  ----\n");\r
+               double TotalMB = 0;\r
+               for (list<MxBase *>::const_iterator p = m_Matrices->begin();\r
+                 p != m_Matrices->end(); ++p)\r
+                       {\r
+                       const MxBase *Mx = *p;\r
+                       if (Mx == 0)\r
+                               continue;\r
+                       //if (Mx->m_RowCount != 0 || ShowEmpty)\r
+                       //      Mx->LogMe(WithData);\r
+                       unsigned ar = Mx->m_AllocatedRowCount;\r
+                       if (ar == 0)\r
+                               continue;\r
+                       unsigned ac = Mx->m_AllocatedColCount;\r
+                       unsigned sz = Mx->GetTypeSize();\r
+                       double MB = (double) ar*(double) ac*(double) sz/1e6;\r
+                       TotalMB += MB;\r
+                       Log("%7u  %7u  %4u  %8.2f  %s\n", ar, ac, sz, MB, Mx->m_Name);\r
+                       }\r
+               Log("                        --------\n");\r
+               Log("%7.7s  %7.7s  %4.4s  %8.2f\n", "", "", "", TotalMB);\r
+               }\r
+\r
+       void LogMe(bool WithData = true, int Opts = 0) const;\r
+       static void LogCounts();\r
+       };\r
+\r
+template<class T> struct Mx : public MxBase\r
+       {\r
+// Disable unimplemented stuff\r
+private:\r
+       Mx(Mx &rhs);\r
+       Mx &operator=(Mx &rhs);\r
+       // const Mx &operator=(const Mx &rhs) const;\r
+\r
+public:\r
+       T **m_Data;\r
+\r
+       Mx()\r
+               {\r
+               m_Data = 0;\r
+               }\r
+       \r
+       ~Mx()\r
+               {\r
+               FreeData();\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount)\r
+               {\r
+               if (opt_logmemgrows)\r
+                       Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n",\r
+                         RowCount, ColCount, IntToStr(GetBytes()), m_Name);\r
+               // m_Data = myalloc<T *>(RowCount);\r
+               m_Data = MYALLOC(T *, RowCount, Mx);\r
+               for (unsigned i = 0; i < RowCount; ++i)\r
+                       // m_Data[i] = myalloc<T>(ColCount);\r
+                       m_Data[i] = MYALLOC(T, ColCount, Mx);\r
+               AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T));\r
+\r
+               m_AllocatedRowCount = RowCount;\r
+               m_AllocatedColCount = ColCount;\r
+               }\r
+\r
+       virtual void FreeData()\r
+               {\r
+               for (unsigned i = 0; i < m_AllocatedRowCount; ++i)\r
+                       MYFREE(m_Data[i], m_AllocatedColCount, Mx);\r
+               MYFREE(m_Data, m_AllocatedRowCount, Mx);\r
+               SubBytes("Mx_AllocData",\r
+                 m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T));\r
+\r
+               m_Data = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               }\r
+\r
+       T **GetData()\r
+               {\r
+               return (T **) m_Data;\r
+               }\r
+\r
+       T Get(unsigned i, unsigned j) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               return m_Data[i][j];\r
+               }\r
+\r
+       void Put(unsigned i, unsigned j, T x) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               m_Data[i][j] = x;\r
+               }\r
+\r
+       T GetOffDiagAvgs(vector<T> &Avgs) const\r
+               {\r
+               if (m_RowCount != m_ColCount)\r
+                       Die("GetOffDiagAvgs, not symmetrical");\r
+               Avgs.clear();\r
+               T Total = T(0);\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       {\r
+                       T Sum = T(0);\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               {\r
+                               if (j == i)\r
+                                       continue;\r
+                               Sum += m_Data[i][j];\r
+                               }\r
+                       T Avg = Sum/(m_RowCount-1);\r
+                       Total += Avg;\r
+                       Avgs.push_back(Avg);\r
+                       }\r
+               return m_RowCount == 0 ? T(0) : Total/m_RowCount;\r
+               }\r
+\r
+       unsigned GetTypeSize() const\r
+               {\r
+               return sizeof(T);\r
+               }\r
+\r
+       virtual unsigned GetBytes() const\r
+               {\r
+               return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() +\r
+                 m_AllocatedRowCount*sizeof(T *);\r
+               }\r
+\r
+       const char *GetAsStr(unsigned i, unsigned j) const\r
+               {\r
+               return TypeToStr<T>(Get(i, j));\r
+               }\r
+\r
+       const T *const *const GetData() const\r
+               {\r
+               return (const T *const *) m_Data;\r
+               }\r
+\r
+       void Copy(const Mx<T> &rhs)\r
+               {\r
+               Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB);\r
+               const T * const *Data = rhs.GetData();\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = Data[i][j];\r
+               }\r
+\r
+       void Assign(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+\r
+       bool Eq(const Mx &rhs, bool Bwd = false) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+               const T * const*d = rhs.GetData();\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       bool EqMask(const Mx &rhs, const Mx<bool> &Mask) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               if (Mask.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (Mask.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               const T * const*d = rhs.GetData();\r
+               bool Bwd = false;\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               if (!Mask.m_Data[i][j])\r
+                                       continue;\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       void Init(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+       };\r
+\r
+void WriteMx(const string &Name, Mx<float> &Mxf);\r
+\r
+template<class T> void ReserveMx(Mx<T> &Mxf, unsigned N = UINT_MAX)\r
+       {\r
+       if (Mxf.m_AllocatedRowCount > 0)\r
+               return;\r
+       extern unsigned g_MaxInputSeqLength;\r
+       if (N == UINT_MAX)\r
+               N = g_MaxInputSeqLength+1;\r
+       Mxf.Alloc("(Reserved)", N, N);\r
+       }\r
+\r
+#endif // mx_h\r
diff --git a/myopts.h b/myopts.h
new file mode 100644 (file)
index 0000000..ba901ea
--- /dev/null
+++ b/myopts.h
@@ -0,0 +1,190 @@
+#ifndef MY_VERSION\r
+#define MY_VERSION     "4.2"\r
+#endif\r
+\r
+STR_OPT(       input,                                  0)\r
+STR_OPT(       query,                                  0)\r
+STR_OPT(       db,                                             0)\r
+STR_OPT(       sort,                                   0)\r
+STR_OPT(       output,                                 0)\r
+STR_OPT(       uc,                                             0)\r
+STR_OPT(       clstr2uc,                               0)\r
+STR_OPT(       uc2clstr,                               0)\r
+STR_OPT(       uc2fasta,                               0)\r
+STR_OPT(       uc2fastax,                              0)\r
+STR_OPT(       mergesort,                              0)\r
+STR_OPT(       tmpdir,                                 ".")\r
+STR_OPT(       staralign,                              0)\r
+STR_OPT(       sortuc,                                 0)\r
+STR_OPT(       blastout,                               0)\r
+STR_OPT(       blast6out,                              0)\r
+STR_OPT(       fastapairs,                             0)\r
+STR_OPT(       idchar,                                 "|")\r
+STR_OPT(       diffchar,                               " ")\r
+STR_OPT(       uchime,                                 0)\r
+STR_OPT(       gapopen,                                0)\r
+STR_OPT(       gapext,                                 0)\r
+STR_OPT(       uhire,                                  0)\r
+STR_OPT(       ids,                                    "99,98,95,90,85,80,70,50,35")\r
+STR_OPT(       seeds,                                  0)\r
+STR_OPT(       clump,                                  0)\r
+STR_OPT(       clumpout,                               0)\r
+STR_OPT(       clump2fasta,                    0)\r
+STR_OPT(       clumpfasta,                             0)\r
+STR_OPT(       hireout,                                0)\r
+STR_OPT(       mergeclumps,                    0)\r
+STR_OPT(       alpha,                                  0)\r
+STR_OPT(       hspalpha,                               0)\r
+STR_OPT(       probmx,                                 0)\r
+STR_OPT(       matrix,                                 0)\r
+STR_OPT(       tracestate,                             0)\r
+STR_OPT(       chainout,                               0)\r
+STR_OPT(       cluster,                                0)\r
+STR_OPT(       computekl,                              0)\r
+STR_OPT(       userout,                                0)\r
+STR_OPT(       userfields,                             0)\r
+STR_OPT(       seedsout,                               0)\r
+STR_OPT(       chainhits,                              0)\r
+STR_OPT(       findorfs,                               0)\r
+STR_OPT(       strand,                                 0)\r
+STR_OPT(       getseqs,                                0)\r
+STR_OPT(       labels,                                 0)\r
+STR_OPT(       doug,                                   0)\r
+STR_OPT(       makeindex,                              0)\r
+STR_OPT(       indexstats,                             0)\r
+STR_OPT(       uchimeout,                              0)\r
+STR_OPT(       uchimealns,                             0)\r
+STR_OPT(       xframe,                                 0)\r
+STR_OPT(       mkctest,                                0)\r
+STR_OPT(       allpairs,                               0)\r
+STR_OPT(       fastq2fasta,                    0)\r
+STR_OPT(       otusort,                                0)\r
+STR_OPT(       sparsedist,                             0)\r
+STR_OPT(       sparsedistparams,               0)\r
+STR_OPT(       mcc,                                    0)\r
+STR_OPT(       utax,                                   0)\r
+STR_OPT(       simcl,                                  0)\r
+STR_OPT(       absort,                                 0)\r
+STR_OPT(       cc,                                             0)\r
+STR_OPT(       uslink,                                 0)\r
+\r
+UNS_OPT(       band,                                   16,                     0,                      UINT_MAX)\r
+UNS_OPT(       minlen,                                 10,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxlen,                                 10000,          1,                      UINT_MAX)\r
+UNS_OPT(       w,                                              0,                      1,                      UINT_MAX)\r
+UNS_OPT(       k,                                              0,                      1,                      UINT_MAX)\r
+UNS_OPT(       stepwords,                              8,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxaccepts,                             1,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxrejects,                             8,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxtargets,                             0,                      0,                      UINT_MAX)\r
+UNS_OPT(       minhsp,                                 32,                     1,                      UINT_MAX)\r
+UNS_OPT(       bump,                                   50,                     0,                      100)\r
+UNS_OPT(       rowlen,                                 64,                     8,                      UINT_MAX)\r
+UNS_OPT(       idprefix,                               0,                      0,                      UINT_MAX)\r
+UNS_OPT(       idsuffix,                               0,                      0,                      UINT_MAX)\r
+UNS_OPT(       chunks,                                 4,                      2,                      UINT_MAX)\r
+UNS_OPT(       minchunk,                               64,                     2,                      UINT_MAX)\r
+UNS_OPT(       maxclump,                               1000,           1,                      UINT_MAX)\r
+UNS_OPT(       iddef,                                  0,                      0,                      UINT_MAX)\r
+UNS_OPT(       mincodons,                              20,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxovd,                                 8,                      0,                      UINT_MAX)\r
+UNS_OPT(       max2,                                   40,                     0,                      UINT_MAX)\r
+UNS_OPT(       querylen,                               500,            0,                      UINT_MAX)\r
+UNS_OPT(       targetlen,                              500,            0,                      UINT_MAX)\r
+UNS_OPT(       orfstyle,                               (1+2+4),        0,                      UINT_MAX)\r
+UNS_OPT(       dbstep,                                 1,                      1,                      UINT_MAX)\r
+UNS_OPT(       randseed,                               1,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxp,                                   2,                      2,                      UINT_MAX)\r
+UNS_OPT(       idsmoothwindow,                 32,                     1,                      UINT_MAX)\r
+UNS_OPT(       mindiffs,                               3,                      1,                      UINT_MAX)\r
+UNS_OPT(       maxspan1,                               24,                     1,                      UINT_MAX)\r
+UNS_OPT(       maxspan2,                               24,                     1,                      UINT_MAX)\r
+UNS_OPT(       minorfcov,                              16,                     1,                      UINT_MAX)\r
+UNS_OPT(       hashsize,                               4195879,        1,                      UINT_MAX)\r
+UNS_OPT(       maxpoly,                                0,                      0,                      UINT_MAX)\r
+UNS_OPT(       droppct,                                50,                     0,                      100)\r
+UNS_OPT(       secs,                                   10,                     0,                      UINT_MAX)\r
+UNS_OPT(       maxqgap,                                0,                      0,                      UINT_MAX)\r
+UNS_OPT(       maxtgap,                                0,                      0,                      UINT_MAX)\r
+\r
+INT_OPT(       frame,                                  0,                      -3,                     +3)\r
+\r
+TOG_OPT(       trace,                                  false)\r
+TOG_OPT(       logmemgrows,                    false)\r
+TOG_OPT(       trunclabels,                    false)\r
+TOG_OPT(       verbose,                                false)\r
+TOG_OPT(       wordcountreject,                true)\r
+TOG_OPT(       rev,                                    false)\r
+TOG_OPT(       output_rejects,                 false)\r
+TOG_OPT(       blast_termgaps,                 false)\r
+TOG_OPT(       fastalign,                              true)\r
+TOG_OPT(       flushuc,                                false)\r
+TOG_OPT(       stable_sort,                    false)\r
+TOG_OPT(       minus_frames,                   true)\r
+TOG_OPT(       usort,                                  true)\r
+TOG_OPT(       nb,                                             false)\r
+TOG_OPT(       twohit,                                 true)\r
+TOG_OPT(       ssort,                                  false)\r
+TOG_OPT(       log_query,                              false)\r
+TOG_OPT(       log_hothits,                    false)\r
+TOG_OPT(       logwordstats,                   false)\r
+TOG_OPT(       ucl,                                    false)\r
+TOG_OPT(       skipgaps2,                              true)\r
+TOG_OPT(       skipgaps,                               true)\r
+TOG_OPT(       denovo,                                 false)\r
+TOG_OPT(       cartoon_orfs,                   false)\r
+TOG_OPT(       label_ab,                               false)\r
+TOG_OPT(       wordweight,                             false)\r
+TOG_OPT(       isort,                                  false)\r
+TOG_OPT(       selfid,                                 false)\r
+TOG_OPT(       leftjust,                               false)\r
+TOG_OPT(       rightjust,                              false)\r
+\r
+FLT_OPT(       id,                                             0.0,            0.0,            1.0)\r
+FLT_OPT(       weak_id,                                0.0,            0.0,            1.0)\r
+FLT_OPT(       match,                                  1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       mismatch,                               -2.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       split,                                  1000.0,         1.0,            FLT_MAX)\r
+FLT_OPT(       evalue,                                 10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       weak_evalue,                    10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       evalue_g,                               10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       chain_evalue,                   10.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_u,                                16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_g,                                32.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_ug,                               16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       xdrop_nw,                               16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       ka_gapped_lambda,               0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_ungapped_lambda,             0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_gapped_k,                    0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_ungapped_k,                  0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       ka_dbsize,                              0.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       chain_targetfract,              0.0,            0.0,            1.0)\r
+FLT_OPT(       targetfract,                    0.0,            0.0,            1.0)\r
+FLT_OPT(       queryfract,                             0.0,            0.0,            1.0)\r
+FLT_OPT(       fspenalty,                              16.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       sspenalty,                              20.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       seedt1,                                 13.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       seedt2,                                 11.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       lopen,                                  11.0,           0.0,            FLT_MAX)\r
+FLT_OPT(       lext,                                   1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       minh,                                   0.3,            0.0,            FLT_MAX)\r
+FLT_OPT(       xn,                                             8.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       dn,                                             1.4,            0.0,            FLT_MAX)\r
+FLT_OPT(       xa,                                             1.0,            0.0,            FLT_MAX)\r
+FLT_OPT(       mindiv,                                 0.5,            0.0,            100.0)\r
+FLT_OPT(       abskew,                                 2,                      0.0,            100.0)\r
+FLT_OPT(       abx,                                    8.0,            0.0,            100.0)\r
+FLT_OPT(       minspanratio1,                  0.7,            0.0,            1.0)\r
+FLT_OPT(       minspanratio2,                  0.7,            0.0,            1.0)\r
+\r
+FLAG_OPT(      usersort)\r
+FLAG_OPT(      exact)\r
+FLAG_OPT(      optimal)\r
+FLAG_OPT(      self)\r
+FLAG_OPT(      ungapped)\r
+FLAG_OPT(      global)\r
+FLAG_OPT(      local)\r
+FLAG_OPT(      xlat)\r
+FLAG_OPT(      realign)\r
+FLAG_OPT(      hash)\r
+FLAG_OPT(      derep)\r
diff --git a/myutils.cpp b/myutils.cpp
new file mode 100755 (executable)
index 0000000..ea983eb
--- /dev/null
@@ -0,0 +1,1844 @@
+#include <time.h>\r
+#include <stdarg.h>\r
+#include <sys/stat.h>\r
+#include <errno.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <string>\r
+#include <vector>\r
+#include <set>\r
+#include <map>\r
+#include <signal.h>\r
+#include <float.h>\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#include <process.h>\r
+#include <windows.h>\r
+#include <psapi.h>\r
+#include <io.h>\r
+#else\r
+#include <sys/time.h>\r
+#include <sys/resource.h>\r
+#include <unistd.h>\r
+#include <errno.h>\r
+#include <fcntl.h>\r
+#include <stdlib.h>\r
+#endif\r
+\r
+#include "myutils.h"\r
+\r
+const char *SVN_VERSION =\r
+#include "svnversion.h"\r
+;\r
+\r
+#define        TEST_UTILS                      0\r
+\r
+using namespace std;\r
+\r
+const unsigned MY_IO_BUFSIZ = 32000;\r
+const unsigned MAX_FORMATTED_STRING_LENGTH = 64000;\r
+\r
+static char *g_IOBuffers[256];\r
+static time_t g_StartTime = time(0);\r
+static vector<string> g_Argv;\r
+static double g_PeakMemUseBytes;\r
+\r
+#if    TEST_UTILS\r
+void TestUtils()\r
+       {\r
+       const int C = 100000000;\r
+       for (int i = 0; i < C; ++i)\r
+               ProgressStep(i, C, "something or other");\r
+\r
+       Progress("\n");\r
+       Progress("Longer message\r");\r
+       Sleep(1000);\r
+       Progress("Short\r");\r
+       Sleep(1000);\r
+       Progress("And longer again\r");\r
+       Sleep(1000);\r
+       Progress("Shrt\n");\r
+       Sleep(1000);\r
+       const unsigned N = 10;\r
+       unsigned M = 10;\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               ProgressStep(i, N, "Allocating 1MB blocks");\r
+               for (unsigned j = 0; j < M; ++j)\r
+                       {\r
+                       ProgressStep(j, M, "Inner loop"); \r
+                       malloc(100000);\r
+                       Sleep(500);\r
+                       }\r
+               }\r
+       }\r
+#endif // TEST_UTILS\r
+\r
+static void AllocBuffer(FILE *f)\r
+       {\r
+       int fd = fileno(f);\r
+       if (fd < 0 || fd >= 256)\r
+               return;\r
+       if (g_IOBuffers[fd] == 0)\r
+               g_IOBuffers[fd] = myalloc(char, MY_IO_BUFSIZ);\r
+       setvbuf(f, g_IOBuffers[fd], _IOFBF, MY_IO_BUFSIZ);\r
+       }\r
+\r
+static void FreeBuffer(FILE *f)\r
+       {\r
+       int fd = fileno(f);\r
+       if (fd < 0 || fd >= 256)\r
+               return;\r
+       if (g_IOBuffers[fd] == 0)\r
+               return;\r
+       myfree(g_IOBuffers[fd]);\r
+       g_IOBuffers[fd] = 0;\r
+       }\r
+\r
+unsigned GetElapsedSecs()\r
+       {\r
+       return (unsigned) (time(0) - g_StartTime);\r
+       }\r
+\r
+static unsigned g_NewCalls;\r
+static unsigned g_FreeCalls;\r
+static double g_InitialMemUseBytes;\r
+static double g_TotalAllocBytes;\r
+static double g_TotalFreeBytes;\r
+static double g_NetBytes;\r
+static double g_MaxNetBytes;\r
+\r
+void LogAllocStats()\r
+       {\r
+       Log("\n");\r
+       Log("       Allocs  %u\n", g_NewCalls);\r
+       Log("        Frees  %u\n", g_FreeCalls);\r
+       Log("Initial alloc  %s\n", MemBytesToStr(g_InitialMemUseBytes));\r
+       Log("  Total alloc  %s\n", MemBytesToStr(g_TotalAllocBytes));\r
+       Log("   Total free  %s\n", MemBytesToStr(g_TotalFreeBytes));\r
+       Log("    Net bytes  %s\n", MemBytesToStr(g_NetBytes));\r
+       Log("Max net bytes  %s\n", MemBytesToStr(g_MaxNetBytes));\r
+       Log("   Peak total  %s\n", MemBytesToStr(g_MaxNetBytes + g_InitialMemUseBytes));\r
+       }\r
+\r
+bool StdioFileExists(const string &FileName)\r
+       {\r
+       struct stat SD;\r
+       int i = stat(FileName.c_str(), &SD);\r
+       return i == 0;\r
+       }\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line)\r
+       {\r
+       Die("%s(%u) assert failed: %s", File, Line, Exp);\r
+       }\r
+\r
+bool myisatty(int fd)\r
+       {\r
+       return isatty(fd) != 0;\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+#include <io.h>\r
+int fseeko(FILE *stream, off_t offset, int whence)\r
+       {\r
+       off_t FilePos = _fseeki64(stream, offset, whence);\r
+       return (FilePos == -1L) ? -1 : 0;\r
+       }\r
+#define ftello(fm) (off_t) _ftelli64(fm)\r
+#endif\r
+\r
+void LogStdioFileState(FILE *f)\r
+       {\r
+       unsigned long tellpos = (unsigned long) ftello(f);\r
+       long fseek_pos = fseek(f, 0, SEEK_CUR);\r
+       int fd = fileno(f);\r
+       Log("FILE *     %p\n", f);\r
+       Log("fileno     %d\n", fd);\r
+       Log("feof       %d\n", feof(f));\r
+       Log("ferror     %d\n", ferror(f));\r
+       Log("ftell      %ld\n", tellpos);\r
+       Log("fseek      %ld\n", fseek_pos);\r
+#if    !defined(_GNU_SOURCE) && !defined(__APPLE_CC__)\r
+       fpos_t fpos;\r
+       int fgetpos_retval = fgetpos(f, &fpos);\r
+       Log("fpos       %ld (retval %d)\n", (long) fpos, fgetpos_retval);\r
+//     Log("eof        %d\n", _eof(fd));\r
+#endif\r
+#ifdef _MSC_VER\r
+       __int64 pos64 = _ftelli64(f);\r
+       Log("_ftelli64  %lld\n", pos64);\r
+#endif\r
+       }\r
+\r
+FILE *OpenStdioFile(const string &FileName)\r
+       {\r
+       const char *Mode = "rb";\r
+       FILE *f = fopen(FileName.c_str(), Mode);\r
+       if (f == 0)\r
+               {\r
+               if (errno == EFBIG)\r
+                       {\r
+                       if (sizeof(off_t) == 4)\r
+                               Die("File too big, off_t is 32 bits, recompile needed");\r
+                       else\r
+                               Die("Cannot open '%s', file too big (off_t=%u bits)",\r
+                                 FileName.c_str(), sizeof(off_t)*8);\r
+                       }\r
+               Die("Cannot open %s, errno=%d %s",\r
+                 FileName.c_str(), errno, strerror(errno));\r
+               }\r
+       AllocBuffer(f);\r
+       return f;\r
+       }\r
+\r
+FILE *CreateStdioFile(const string &FileName)\r
+       {\r
+       FILE *f = fopen(FileName.c_str(), "wb+");\r
+       if (0 == f)\r
+               Die("Cannot create %s, errno=%d %s",\r
+                 FileName.c_str(), errno, strerror(errno));\r
+       AllocBuffer(f);\r
+       return f;\r
+       }\r
+\r
+void SetStdioFilePos(FILE *f, off_t Pos)\r
+       {\r
+       if (0 == f)\r
+               Die("SetStdioFilePos failed, f=NULL");\r
+       int Ok = fseeko(f, Pos, SEEK_SET);\r
+       off_t NewPos = ftello(f);\r
+       if (Ok != 0 || Pos != NewPos)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("SetStdioFilePos(%d) failed, Ok=%d NewPos=%d",\r
+                 (int) Pos, Ok, (int) NewPos);\r
+               }\r
+       }\r
+\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("ReadStdioFile failed, f=NULL");\r
+       SetStdioFilePos(f, Pos);\r
+       unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+       if (BytesRead != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesRead, errno);\r
+               }\r
+       }\r
+\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("ReadStdioFile failed, f=NULL");\r
+       unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+       if (BytesRead != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesRead, errno);\r
+               }\r
+       }\r
+\r
+// Return values from functions like lseek, ftell, fgetpos are\r
+// "undefined" for files that cannot seek. Attempt to detect\r
+// whether a file can seek by checking for error returns.\r
+bool CanSetStdioFilePos(FILE *f)\r
+       {\r
+// Common special cases\r
+       if (f == stdin || f == stdout || f == stderr)\r
+               return false;\r
+\r
+       fpos_t CurrPos;\r
+       int ok1 = fgetpos(f, &CurrPos);\r
+       if (ok1 < 0)\r
+               return false;\r
+       int ok2 = fseek(f, 0, SEEK_END);\r
+       if (ok2 < 0)\r
+               return false;\r
+       fpos_t EndPos;\r
+       int ok3 = fgetpos(f, &EndPos);\r
+       int ok4 = fsetpos(f, &CurrPos);\r
+       if (!ok3 || !ok4)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+byte *ReadAllStdioFile(FILE *f, unsigned &FileSize)\r
+       {\r
+       const unsigned BUFF_SIZE = 1024*1024;\r
+\r
+       if (CanSetStdioFilePos(f))\r
+               {\r
+               off_t Pos = GetStdioFilePos(f);\r
+               off_t FileSize = GetStdioFileSize(f);\r
+               if (FileSize > UINT_MAX)\r
+                       Die("ReadAllStdioFile: file size > UINT_MAX");\r
+               SetStdioFilePos(f, 0);\r
+               byte *Buffer = myalloc(byte, unsigned(FileSize));\r
+               ReadStdioFile(f, Buffer, unsigned(FileSize));\r
+               SetStdioFilePos(f, Pos);\r
+               FileSize = unsigned(FileSize);\r
+               return Buffer;\r
+               }\r
+\r
+// Can't seek, read one buffer at a time.\r
+       FileSize = 0;\r
+\r
+// Just to initialize so that first call to realloc works.\r
+       byte *Buffer = (byte *) malloc(4);\r
+       if (Buffer == 0)\r
+               Die("ReadAllStdioFile, out of memory");\r
+       for (;;)\r
+               {\r
+               Buffer = (byte *) realloc(Buffer, FileSize + BUFF_SIZE);\r
+               unsigned BytesRead = fread(Buffer + FileSize, 1, BUFF_SIZE, f);\r
+               FileSize += BytesRead;\r
+               if (BytesRead < BUFF_SIZE)\r
+                       {\r
+                       Buffer = (byte *) realloc(Buffer, FileSize);\r
+                       return Buffer;\r
+                       }\r
+               }\r
+       }\r
+\r
+byte *ReadAllStdioFile(const std::string &FileName, off_t &FileSize)\r
+       {\r
+#if    WIN32\r
+       FILE *f = OpenStdioFile(FileName);\r
+       FileSize = GetStdioFileSize(f);\r
+       CloseStdioFile(f);\r
+\r
+       HANDLE h = CreateFile(FileName.c_str(), GENERIC_READ, FILE_SHARE_READ,\r
+         NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);\r
+       if (h == INVALID_HANDLE_VALUE)\r
+               Die("ReadAllStdioFile:Open(%s) failed", FileName.c_str());\r
+\r
+       unsigned uFileSize = (unsigned) FileSize;\r
+       if ((off_t) uFileSize != FileSize)\r
+               Die("File too big (%.1f Gb): %s", double(FileSize)/1e9, FileName.c_str());\r
+\r
+       byte *Buffer = myalloc(byte, uFileSize);\r
+       DWORD BytesRead;\r
+       ReadFile(h, Buffer, uFileSize, &BytesRead, NULL);\r
+       if (FileSize != BytesRead)\r
+               Die("ReadAllStdioFile:Error reading %s, attempted %u got %u",\r
+                 FileName.c_str(), FileSize, (unsigned) BytesRead);\r
+\r
+       CloseHandle(h);\r
+       return Buffer;\r
+#else\r
+       int h = open(FileName.c_str(), O_RDONLY);\r
+       if (h < 0)\r
+               Die("ReadAllStdioFile:Cannot open %s", FileName.c_str());\r
+       FileSize = lseek(h, 0, SEEK_END);\r
+       if (FileSize == (off_t) (-1))\r
+               Die("ReadAllStdioFile:Error seeking %s", FileName.c_str());\r
+       // byte *Buffer = myalloc<byte>(FileSize);\r
+       size_t stBytes = (size_t) FileSize;\r
+       if ((off_t) stBytes != FileSize)\r
+               Die("ReadAllStdioFile: off_t overflow");\r
+       byte *Buffer = (byte *) malloc(stBytes);\r
+       if (Buffer == 0)\r
+               Die("ReadAllStdioFile: failed to allocate %s", MemBytesToStr(stBytes));\r
+       lseek(h, 0, SEEK_SET);\r
+       size_t n = read(h, Buffer, stBytes);\r
+       if (n != FileSize)\r
+               Die("ReadAllStdioFile, Error reading %s, attempted %g got %g",\r
+                 FileName.c_str(), (double) FileSize, (double) n);\r
+       close(h);\r
+       return Buffer;\r
+#endif\r
+       }\r
+\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("WriteStdioFile failed, f=NULL");\r
+       SetStdioFilePos(f, Pos);\r
+       unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+       if (BytesWritten != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesWritten, errno);\r
+               }\r
+       }\r
+\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes)\r
+       {\r
+       if (0 == f)\r
+               Die("WriteStdioFile failed, f=NULL");\r
+       unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+       if (BytesWritten != Bytes)\r
+               {\r
+               LogStdioFileState(f);\r
+               Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+                 (int) Bytes, (int) BytesWritten, errno);\r
+               }\r
+       }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes)\r
+       {\r
+       if (feof(f))\r
+               return false;\r
+       if ((int) Bytes < 0)\r
+               Die("ReadLineStdioFile: Bytes < 0");\r
+       char *RetVal = fgets(Line, (int) Bytes, f);\r
+       if (NULL == RetVal)\r
+               {\r
+               if (feof(f))\r
+                       return false;\r
+               if (ferror(f))\r
+                       Die("ReadLineStdioFile: errno=%d", errno);\r
+               Die("ReadLineStdioFile: fgets=0, feof=0, ferror=0");\r
+               }\r
+\r
+       if (RetVal != Line)\r
+               Die("ReadLineStdioFile: fgets != Buffer");\r
+       unsigned n = strlen(Line);\r
+       if (n < 1 || Line[n-1] != '\n')\r
+               Die("ReadLineStdioFile: line too long or missing end-of-line");\r
+       if (n > 0 && (Line[n-1] == '\r' || Line[n-1] == '\n'))\r
+               Line[n-1] = 0;\r
+       if (n > 1 && (Line[n-2] == '\r' || Line[n-2] == '\n'))\r
+               Line[n-2] = 0;\r
+       return true;\r
+       }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, string &Line)\r
+       {\r
+       Line.clear();\r
+       for (;;)\r
+               {\r
+               int c = fgetc(f);\r
+               if (c == -1)\r
+                       {\r
+                       if (feof(f))\r
+                               {\r
+                               if (!Line.empty())\r
+                                       return true;\r
+                               return false;\r
+                               }\r
+                       Die("ReadLineStdioFile, errno=%d", errno);\r
+                       }\r
+               if (c == '\r')\r
+                       continue;\r
+               if (c == '\n')\r
+                       return true;\r
+               Line.push_back((char) c);\r
+               }\r
+       }\r
+\r
+// Copies all of fFrom regardless of current\r
+// file position, appends to fTo.\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo)\r
+       {\r
+       off_t SavedFromPos = GetStdioFilePos(fFrom);\r
+       off_t FileSize = GetStdioFileSize(fFrom);\r
+       const off_t BUFF_SIZE = 1024*1024;\r
+       char *Buffer = myalloc(char, BUFF_SIZE);\r
+       SetStdioFilePos(fFrom, 0);\r
+       off_t BytesRemaining = FileSize;\r
+       while (BytesRemaining > 0)\r
+               {\r
+               off_t BytesToRead = BytesRemaining;\r
+               if (BytesToRead > BUFF_SIZE)\r
+                       BytesToRead = BUFF_SIZE;\r
+               ReadStdioFile(fFrom, Buffer, (unsigned) BytesToRead);\r
+               WriteStdioFile(fTo, Buffer, (unsigned) BytesToRead);\r
+               BytesRemaining -= BytesToRead;\r
+               }\r
+       SetStdioFilePos(fFrom, SavedFromPos);\r
+       }\r
+\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo)\r
+       {\r
+       int Ok = rename(FileNameFrom.c_str(), FileNameTo.c_str());\r
+       if (Ok != 0)\r
+               Die("RenameStdioFile(%s,%s) failed, errno=%d %s",\r
+                 FileNameFrom.c_str(), FileNameTo.c_str(), errno, strerror(errno));\r
+       }\r
+\r
+void FlushStdioFile(FILE *f)\r
+       {\r
+       int Ok = fflush(f);\r
+       if (Ok != 0)\r
+               Die("fflush(%p)=%d,", f, Ok);\r
+       }\r
+\r
+void CloseStdioFile(FILE *f)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+       int Ok = fclose(f);\r
+       if (Ok != 0)\r
+               Die("fclose(%p)=%d", f, Ok);\r
+       FreeBuffer(f);\r
+       }\r
+\r
+off_t GetStdioFilePos(FILE *f)\r
+       {\r
+       off_t FilePos = ftello(f);\r
+       if (FilePos < 0)\r
+               Die("ftello=%d", (int) FilePos);\r
+       return FilePos;\r
+       }\r
+\r
+off_t GetStdioFileSize(FILE *f)\r
+       {\r
+       off_t CurrentPos = GetStdioFilePos(f);\r
+       int Ok = fseeko(f, 0, SEEK_END);\r
+       if (Ok < 0)\r
+               Die("fseek in GetFileSize");\r
+\r
+       off_t Length = ftello(f);\r
+       if (Length < 0)\r
+               Die("ftello in GetFileSize");\r
+       SetStdioFilePos(f, CurrentPos);\r
+       return Length;\r
+       }\r
+\r
+void DeleteStdioFile(const string &FileName)\r
+       {\r
+       int Ok = remove(FileName.c_str());\r
+       if (Ok != 0)\r
+               Die("remove(%s) failed, errno=%d %s", FileName.c_str(), errno, strerror(errno));\r
+       }\r
+\r
+void myvstrprintf(string &Str, const char *Format, va_list ArgList)\r
+       {\r
+       static char szStr[MAX_FORMATTED_STRING_LENGTH];\r
+       vsnprintf(szStr, MAX_FORMATTED_STRING_LENGTH-1, Format, ArgList);\r
+       szStr[MAX_FORMATTED_STRING_LENGTH - 1] = '\0';\r
+       Str.assign(szStr);\r
+       }\r
+\r
+void myvstrprintf(string &Str, const char *Format, ...)\r
+       {\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+       }\r
+\r
+FILE *g_fLog = 0;\r
+\r
+void SetLogFileName(const string &FileName)\r
+       {\r
+       if (g_fLog != 0)\r
+               CloseStdioFile(g_fLog);\r
+       g_fLog = 0;\r
+       if (FileName.empty())\r
+               return;\r
+       g_fLog = CreateStdioFile(FileName);\r
+       }\r
+\r
+void Log(const char *Format, ...)\r
+       {\r
+       if (g_fLog == 0)\r
+               return;\r
+\r
+       static bool InLog = false;\r
+       if (InLog)\r
+               return;\r
+\r
+       InLog = true;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       vfprintf(g_fLog, Format, ArgList);\r
+       va_end(ArgList);\r
+       fflush(g_fLog);\r
+       InLog = false;\r
+       }\r
+\r
+void Die(const char *Format, ...)\r
+       {\r
+       static bool InDie = false;\r
+       if (InDie)\r
+               exit(1);\r
+       InDie = true;\r
+       string Msg;\r
+\r
+       if (g_fLog != 0)\r
+               setbuf(g_fLog, 0);\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Msg, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       fprintf(stderr, "\n\n");\r
+       Log("\n");\r
+       time_t t = time(0);\r
+       Log("%s", asctime(localtime(&t)));\r
+       for (unsigned i = 0; i < g_Argv.size(); i++)\r
+               {\r
+               fprintf(stderr, (i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+               Log((i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+               }\r
+       fprintf(stderr, "\n");\r
+       Log("\n");\r
+\r
+       time_t CurrentTime = time(0);\r
+       unsigned ElapsedSeconds = unsigned(CurrentTime - g_StartTime);\r
+       const char *sstr = SecsToStr(ElapsedSeconds);\r
+       Log("Elapsed time: %s\n", sstr);\r
+\r
+       const char *szStr = Msg.c_str();\r
+       fprintf(stderr, "\n---Fatal error---\n%s\n", szStr);\r
+       Log("\n---Fatal error---\n%s\n", szStr);\r
+\r
+#ifdef _MSC_VER\r
+       if (IsDebuggerPresent())\r
+               __debugbreak();\r
+       _CrtSetDbgFlag(0);\r
+#endif\r
+\r
+       exit(1);\r
+       }\r
+\r
+void Warning(const char *Format, ...)\r
+       {\r
+       string Msg;\r
+\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Msg, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       const char *szStr = Msg.c_str();\r
+\r
+       fprintf(stderr, "\nWARNING: %s\n", szStr);\r
+       if (g_fLog != stdout)\r
+               {\r
+               Log("\nWARNING: %s\n", szStr);\r
+               fflush(g_fLog);\r
+               }\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+double GetMemUseBytes()\r
+       {\r
+       HANDLE hProc = GetCurrentProcess();\r
+       PROCESS_MEMORY_COUNTERS PMC;\r
+       BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));\r
+       if (!bOk)\r
+               return 1000000;\r
+       double Bytes = (double) PMC.WorkingSetSize;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#elif  linux || __linux__\r
+double GetMemUseBytes()\r
+       {\r
+       static char statm[64];\r
+       static int PageSize = 1;\r
+       if (0 == statm[0])\r
+               {\r
+               PageSize = sysconf(_SC_PAGESIZE);\r
+               pid_t pid = getpid();\r
+               sprintf(statm, "/proc/%d/statm", (int) pid);\r
+               }\r
+\r
+       int fd = open(statm, O_RDONLY);\r
+       if (-1 == fd)\r
+               return 1000000;\r
+       char Buffer[64];\r
+       int n = read(fd, Buffer, sizeof(Buffer) - 1);\r
+       close(fd);\r
+       fd = -1;\r
+\r
+       if (n <= 0)\r
+               return 1000000;\r
+\r
+       Buffer[n] = 0;\r
+       double Pages = atof(Buffer);\r
+\r
+       double Bytes = Pages*PageSize;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#elif defined(__MACH__)\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+#include <stdio.h>\r
+#include <unistd.h>\r
+#include <sys/types.h>\r
+#include <sys/sysctl.h>\r
+#include <sys/socket.h>\r
+#include <sys/gmon.h>\r
+#include <mach/vm_param.h>\r
+#include <netinet/in.h>\r
+#include <netinet/icmp6.h>\r
+#include <sys/vmmeter.h>\r
+#include <sys/proc.h>\r
+#include <mach/task_info.h>\r
+#include <mach/task.h>\r
+#include <mach/mach_init.h>\r
+#include <mach/vm_statistics.h>\r
+\r
+#define DEFAULT_MEM_USE        100000000.0\r
+\r
+double GetMemUseBytes()\r
+       {\r
+       task_t mytask = mach_task_self();\r
+       struct task_basic_info ti;\r
+       memset((void *) &ti, 0, sizeof(ti));\r
+       mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;\r
+       kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);\r
+       if (ok == KERN_INVALID_ARGUMENT)\r
+               return DEFAULT_MEM_USE;\r
+\r
+       if (ok != KERN_SUCCESS)\r
+               return DEFAULT_MEM_USE;\r
+\r
+       double Bytes = (double ) ti.resident_size;\r
+       if (Bytes > g_PeakMemUseBytes)\r
+               g_PeakMemUseBytes = Bytes;\r
+       return Bytes;\r
+       }\r
+#else\r
+double GetMemUseBytes()\r
+       {\r
+       return 0;\r
+       }\r
+#endif\r
+\r
+double GetPeakMemUseBytes()\r
+       {\r
+       return g_PeakMemUseBytes;\r
+       }\r
+\r
+const char *SecsToHHMMSS(int Secs)\r
+       {\r
+       int HH = Secs/3600;\r
+       int MM = (Secs - HH*3600)/60;\r
+       int SS = Secs%60;\r
+       static char Str[16];\r
+       if (HH == 0)\r
+               sprintf(Str, "%02d:%02d", MM, SS);\r
+       else\r
+               sprintf(Str, "%02d:%02d:%02d", HH, MM, SS);\r
+       return Str;\r
+       }\r
+\r
+const char *SecsToStr(double Secs)\r
+       {\r
+       if (Secs >= 10.0)\r
+               return SecsToHHMMSS((int) Secs);\r
+\r
+       static char Str[16];\r
+       if (Secs < 1e-6)\r
+               sprintf(Str, "%.2gs", Secs);\r
+       else if (Secs < 1e-3)\r
+               sprintf(Str, "%.2fms", Secs*1e3);\r
+       else\r
+               sprintf(Str, "%.3fs", Secs);\r
+       return Str;\r
+       }\r
+\r
+const char *MemBytesToStr(double Bytes)\r
+       {\r
+       static char Str[32];\r
+\r
+       if (Bytes < 1e6)\r
+               sprintf(Str, "%.1fkb", Bytes/1e3);\r
+       else if (Bytes < 10e6)\r
+               sprintf(Str, "%.1fMb", Bytes/1e6);\r
+       else if (Bytes < 1e9)\r
+               sprintf(Str, "%.0fMb", Bytes/1e6);\r
+       else if (Bytes < 10e9)\r
+               sprintf(Str, "%.1fGb", Bytes/1e9);\r
+       else if (Bytes < 100e9)\r
+               sprintf(Str, "%.0fGb", Bytes/1e9);\r
+       else\r
+               sprintf(Str, "%.3gb", Bytes);\r
+       return Str;\r
+       }\r
+\r
+const char *IntToStr(unsigned i)\r
+       {\r
+       static char Str[32];\r
+\r
+       double d = (double) i;\r
+       if (i < 10000)\r
+               sprintf(Str, "%u", i);\r
+       else if (i < 1e6)\r
+               sprintf(Str, "%.1fk", d/1e3);\r
+       else if (i < 10e6)\r
+               sprintf(Str, "%.1fM", d/1e6);\r
+       else if (i < 1e9)\r
+               sprintf(Str, "%.0fM", d/1e6);\r
+       else if (i < 10e9)\r
+               sprintf(Str, "%.1fG", d/1e9);\r
+       else if (i < 100e9)\r
+               sprintf(Str, "%.0fG", d/1e9);\r
+       else\r
+               sprintf(Str, "%.3g", d);\r
+       return Str;\r
+       }\r
+\r
+const char *FloatToStr(double d)\r
+       {\r
+       static char Str[32];\r
+\r
+       double a = fabs(d);\r
+       if (a < 0.01)\r
+               sprintf(Str, "%.3g", a);\r
+       else if (a >= 0.01 && a < 1)\r
+               sprintf(Str, "%.3f", a);\r
+       else if (a <= 10 && a >= 1)\r
+               {\r
+               double intpart;\r
+               if (modf(a, &intpart) < 0.05)\r
+                       sprintf(Str, "%.0f", d);\r
+               else\r
+                       sprintf(Str, "%.1f", d);\r
+               }\r
+       else if (a > 10 && a < 10000)\r
+               sprintf(Str, "%.0f", d);\r
+       else if (a < 1e6)\r
+               sprintf(Str, "%.1fk", d/1e3);\r
+       else if (a < 10e6)\r
+               sprintf(Str, "%.1fM", d/1e6);\r
+       else if (a < 1e9)\r
+               sprintf(Str, "%.0fM", d/1e6);\r
+       else if (a < 10e9)\r
+               sprintf(Str, "%.1fG", d/1e9);\r
+       else if (a < 100e9)\r
+               sprintf(Str, "%.0fG", d/1e9);\r
+       else\r
+               sprintf(Str, "%.3g", d);\r
+       return Str;\r
+       }\r
+\r
+bool opt_quiet = false;\r
+bool opt_version = false;\r
+bool opt_logopts = false;\r
+bool opt_compilerinfo = false;\r
+bool opt_help = false;\r
+string opt_log = "";\r
+\r
+bool optset_quiet = false;\r
+bool optset_version = false;\r
+bool optset_logopts = false;\r
+bool optset_compilerinfo = false;\r
+bool optset_help = false;\r
+bool optset_log = false;\r
+\r
+static string g_CurrentProgressLine;\r
+static string g_ProgressDesc;\r
+static unsigned g_ProgressIndex;\r
+static unsigned g_ProgressCount;\r
+\r
+static unsigned g_CurrProgressLineLength;\r
+static unsigned g_LastProgressLineLength;\r
+static unsigned g_CountsInterval;\r
+static unsigned g_StepCalls;\r
+static time_t g_TimeLastOutputStep;\r
+\r
+static string &GetProgressPrefixStr(string &s)\r
+       {\r
+       double Bytes = GetMemUseBytes();\r
+       unsigned Secs = GetElapsedSecs();\r
+       s = string(SecsToHHMMSS(Secs));\r
+       if (Bytes > 0)\r
+               {\r
+               s.push_back(' ');\r
+               char Str[32];\r
+               sprintf(Str, "%5.5s", MemBytesToStr(Bytes));\r
+               s += string(Str);\r
+               }\r
+       s.push_back(' ');\r
+       return s;\r
+       }\r
+\r
+void ProgressLog(const char *Format, ...)\r
+       {\r
+       string Str;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+       Log("%s", Str.c_str());\r
+       Progress("%s", Str.c_str());\r
+       }\r
+\r
+void Progress(const char *Format, ...)\r
+       {\r
+       if (opt_quiet)\r
+               return;\r
+\r
+       string Str;\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+\r
+#if    0\r
+       Log("Progress(");\r
+       for (unsigned i = 0; i < Str.size(); ++i)\r
+               {\r
+               char c = Str[i];\r
+               if (c == '\r')\r
+                       Log("\\r");\r
+               else if (c == '\n')\r
+                       Log("\\n");\r
+               else\r
+                       Log("%c", c);\r
+               }\r
+       Log(")\n");\r
+#endif //0\r
+\r
+       for (unsigned i = 0; i < Str.size(); ++i)\r
+               {\r
+               if (g_CurrProgressLineLength == 0)\r
+                       {\r
+                       string s;\r
+                       GetProgressPrefixStr(s);\r
+                       for (unsigned j = 0; j < s.size(); ++j)\r
+                               {\r
+                               fputc(s[j], stderr);\r
+                               ++g_CurrProgressLineLength;\r
+                               }\r
+                       }\r
+\r
+               char c = Str[i];\r
+               if (c == '\n' || c == '\r')\r
+                       {\r
+                       for (unsigned j = g_CurrProgressLineLength; j < g_LastProgressLineLength; ++j)\r
+                               fputc(' ', stderr);\r
+                       if (c == '\n')\r
+                               g_LastProgressLineLength = 0;\r
+                       else\r
+                               g_LastProgressLineLength = g_CurrProgressLineLength;\r
+                       g_CurrProgressLineLength = 0;\r
+                       fputc(c, stderr);\r
+                       }\r
+               else\r
+                       {\r
+                       fputc(c, stderr);\r
+                       ++g_CurrProgressLineLength;\r
+                       }\r
+               }\r
+       }\r
+\r
+void ProgressExit()\r
+       {\r
+       time_t Now = time(0);\r
+       struct tm *t = localtime(&Now);\r
+       const char *s = asctime(t);\r
+       unsigned Secs = GetElapsedSecs();\r
+\r
+       Log("\n");\r
+       Log("Finished %s", s); // there is a newline in s\r
+       Log("Elapsed time %s\n", SecsToHHMMSS((int) Secs));\r
+       Log("Max memory %s\n", MemBytesToStr(g_PeakMemUseBytes));\r
+#if    WIN32 && DEBUG\r
+// Skip exit(), which can be very slow in DEBUG build\r
+// VERY DANGEROUS practice, because it skips global destructors.\r
+// But if you know the rules, you can break 'em, right?\r
+       ExitProcess(0);\r
+#endif\r
+       }\r
+\r
+const char *PctStr(double x, double y)\r
+       {\r
+       if (y == 0)\r
+               {\r
+               if (x == 0)\r
+                       return "100%";\r
+               else\r
+                       return "inf%";\r
+               }\r
+       static char Str[16];\r
+       double p = x*100.0/y;\r
+       sprintf(Str, "%5.1f%%", p);\r
+       return Str;\r
+       }\r
+\r
+string &GetProgressLevelStr(string &s)\r
+       {\r
+       unsigned Index = g_ProgressIndex;\r
+       unsigned Count = g_ProgressCount;\r
+       if (Count == UINT_MAX)\r
+               {\r
+               if (Index == UINT_MAX)\r
+                       s = "100%";\r
+               else\r
+                       {\r
+                       char Tmp[16];\r
+                       sprintf(Tmp, "%u", Index); \r
+                       s = Tmp;\r
+                       }\r
+               }\r
+       else\r
+               s = string(PctStr(Index+1, Count));\r
+       s += string(" ") + g_ProgressDesc;\r
+       return s;\r
+       }\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...)\r
+       {\r
+       if (opt_quiet)\r
+               return;\r
+\r
+       if (i == 0)\r
+               {\r
+               string Str;\r
+               va_list ArgList;\r
+               va_start(ArgList, Format);\r
+               myvstrprintf(Str, Format, ArgList);\r
+               va_end(ArgList);\r
+               g_ProgressDesc = Str;\r
+               g_ProgressIndex = 0;\r
+               g_ProgressCount = N;\r
+               g_CountsInterval = 1;\r
+               g_StepCalls = 0;\r
+               g_TimeLastOutputStep = 0;\r
+               if (g_CurrProgressLineLength > 0)\r
+                       Progress("\n");\r
+               }\r
+\r
+       if (i >= N && i != UINT_MAX)\r
+               Die("ProgressStep(%u,%u)", i, N);\r
+       bool IsLastStep = (i == UINT_MAX || i + 1 == N);\r
+       if (!IsLastStep)\r
+               {\r
+               ++g_StepCalls;\r
+               if (g_StepCalls%g_CountsInterval != 0)\r
+                       return;\r
+\r
+               time_t Now = time(0);\r
+               if (Now == g_TimeLastOutputStep)\r
+                       {\r
+                       if (g_CountsInterval < 128)\r
+                               g_CountsInterval = (g_CountsInterval*3)/2;\r
+                       else\r
+                               g_CountsInterval += 64;\r
+                       return;\r
+                       }\r
+               else\r
+                       {\r
+                       time_t Secs = Now - g_TimeLastOutputStep;\r
+                       if (Secs > 1)\r
+                               g_CountsInterval = unsigned(g_CountsInterval/(Secs*8));\r
+                       }\r
+\r
+               if (g_CountsInterval < 1)\r
+                       g_CountsInterval = 1;\r
+\r
+               g_TimeLastOutputStep = Now;\r
+               }\r
+\r
+       g_ProgressIndex = i;\r
+\r
+       if (i > 0)\r
+               {\r
+               va_list ArgList;\r
+               va_start(ArgList, Format);\r
+               myvstrprintf(g_ProgressDesc, Format, ArgList);\r
+               }\r
+\r
+       string LevelStr;\r
+       GetProgressLevelStr(LevelStr);\r
+       Progress(" %s\r", LevelStr.c_str());\r
+\r
+       if (IsLastStep)\r
+               {\r
+               g_CountsInterval = 1;\r
+               fputc('\n', stderr);\r
+               }\r
+       }\r
+\r
+enum OptType\r
+       {\r
+       OT_Flag,\r
+       OT_Tog,\r
+       OT_Int,\r
+       OT_Uns,\r
+       OT_Str,\r
+       OT_Float,\r
+       OT_Enum\r
+       };\r
+\r
+struct OptInfo\r
+       {\r
+       void *Value;\r
+       bool *OptSet;\r
+       string LongName;\r
+       OptType Type;\r
+       int iMin;\r
+       int iMax;\r
+       unsigned uMin;\r
+       unsigned uMax;\r
+       double dMin;\r
+       double dMax;\r
+       map<string, unsigned> EnumValues;\r
+\r
+       bool bDefault;\r
+       int iDefault;\r
+       unsigned uDefault;\r
+       double dDefault;\r
+       string strDefault;\r
+\r
+       string Help;\r
+\r
+       bool operator<(const OptInfo &rhs) const\r
+               {\r
+               return LongName < rhs.LongName;\r
+               }\r
+       };\r
+\r
+static set<OptInfo> g_Opts;\r
+\r
+void Help()\r
+       {\r
+       printf("\n");\r
+\r
+       void Usage();\r
+       Usage();\r
+\r
+       for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+\r
+               printf("\n");\r
+               string LongName = Opt.LongName.c_str();\r
+               if (Opt.Type == OT_Tog)\r
+                       LongName = string("[no]") + LongName;\r
+               printf("  --%s ", LongName.c_str());\r
+\r
+               switch (Opt.Type)\r
+                       {\r
+               case OT_Flag:\r
+                       break;\r
+               case OT_Tog:\r
+                       break;\r
+               case OT_Int:\r
+                       printf("<int>");\r
+                       break;\r
+               case OT_Uns:\r
+                       printf("<uint>");\r
+                       break;\r
+               case OT_Str:\r
+                       printf("<str>");\r
+                       break;\r
+               case OT_Float:\r
+                       printf("<float>");\r
+                       break;\r
+               case OT_Enum:\r
+                       printf("<enum>");\r
+                       break;\r
+               default:\r
+                       printf("??type");\r
+                       break;\r
+                       }\r
+\r
+               printf("  ");\r
+               const string &s = Opt.Help;\r
+               for (string::const_iterator q = s.begin(); q != s.end(); ++q)\r
+                       {\r
+                       char c = *q;\r
+                       if (c == '\n')\r
+                               printf("\n   ");\r
+                       else\r
+                               printf("%c", c);\r
+                       }\r
+               printf("\n");\r
+               }\r
+       printf("\n");\r
+       exit(0);\r
+       }\r
+\r
+void CmdLineErr(const char *Format, ...)\r
+       {\r
+       va_list ArgList;\r
+       va_start(ArgList, Format);\r
+       string Str;\r
+       myvstrprintf(Str, Format, ArgList);\r
+       va_end(ArgList);\r
+       fprintf(stderr, "\n");\r
+       fprintf(stderr, "Invalid command line\n");\r
+       fprintf(stderr, "%s\n", Str.c_str());\r
+       fprintf(stderr, "For list of command-line options use --help.\n");\r
+       fprintf(stderr, "\n");\r
+       exit(1);\r
+       }\r
+\r
+static set<OptInfo>::iterator GetOptInfo(const string &LongName,\r
+  bool ErrIfNotFound)\r
+       {\r
+       for (set<OptInfo>::iterator p = g_Opts.begin();\r
+         p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+               if (Opt.LongName == LongName)\r
+                       return p;\r
+               if (Opt.Type == OT_Tog && "no" + Opt.LongName == LongName)\r
+                       return p;\r
+               }\r
+       if (ErrIfNotFound)\r
+               CmdLineErr("Option --%s is invalid", LongName.c_str());\r
+       return g_Opts.end();\r
+       }\r
+\r
+static void AddOpt(const OptInfo &Opt)\r
+       {\r
+       if (GetOptInfo(Opt.LongName, false) != g_Opts.end())\r
+               Die("Option --%s defined twice", Opt.LongName.c_str());\r
+       g_Opts.insert(Opt);\r
+       }\r
+\r
+#ifdef _MSC_VER\r
+#pragma warning(disable: 4505) // unreferenced local function\r
+#endif\r
+\r
+static void DefineFlagOpt(const string &LongName, const string &Help,\r
+  void *Value, bool *OptSet)\r
+       {\r
+       *(bool *) Value = false;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.bDefault = false;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Flag;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineTogOpt(const string &LongName, bool Default, const string &Help,\r
+  void *Value, bool *OptSet)\r
+       {\r
+       *(bool *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.bDefault = Default;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Tog;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineIntOpt(const string &LongName, int Default, int Min, int Max,\r
+  const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(int *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.iDefault = Default;\r
+       Opt.iMin = Min;\r
+       Opt.iMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Int;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineUnsOpt(const string &LongName, unsigned Default, unsigned Min,\r
+  unsigned Max, const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(unsigned *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.uDefault = Default;\r
+       Opt.uMin = Min;\r
+       Opt.uMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Uns;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineFloatOpt(const string &LongName, double Default, double Min,\r
+  double Max, const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(double *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.dDefault = Default;\r
+       Opt.dMin = Min;\r
+       Opt.dMax = Max;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Float;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void DefineStrOpt(const string &LongName, const char *Default,\r
+  const string &Help, void *Value, bool *OptSet)\r
+       {\r
+       *(string *) Value = (Default == 0 ? "" : string(Default));\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.OptSet = OptSet;\r
+       Opt.LongName = LongName;\r
+       Opt.strDefault = (Default == 0 ? "" : string(Default));\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Str;\r
+       AddOpt(Opt);\r
+       }\r
+\r
+static void ParseEnumValues(const string &Values, map<string, unsigned> &EnumValues)\r
+       {\r
+       EnumValues.clear();\r
+       \r
+       string Name;\r
+       string Value;\r
+       bool Eq = false;\r
+       for (string::const_iterator p = Values.begin(); ; ++p)\r
+               {\r
+               char c = (p == Values.end() ? '|' : *p);\r
+               if (isspace(c))\r
+                       ;\r
+               else if (c == '|')\r
+                       {\r
+                       if (EnumValues.find(Name) != EnumValues.end())\r
+                               Die("Invalid enum values, '%s' defined twice: '%s'",\r
+                                 Name.c_str(), Values.c_str());\r
+                       if (Name.empty() || Value.empty())\r
+                               Die("Invalid enum values, empty name or value: '%s'",\r
+                                 Values.c_str());\r
+\r
+                       EnumValues[Name] = atoi(Value.c_str());\r
+                       Name.clear();\r
+                       Value.clear();\r
+                       Eq = false;\r
+                       }\r
+               else if (c == '=')\r
+                       Eq = true;\r
+               else if (Eq)\r
+                       Value.push_back(c);\r
+               else\r
+                       Name.push_back(c);\r
+               if (p == Values.end())\r
+                       return;\r
+               }\r
+       }\r
+\r
+static void DefineEnumOpt(const string &LongName, const string &ShortName,\r
+  int Default, const string &Values, const string &Help, void *Value)\r
+       {\r
+       *(int *) Value = Default;\r
+\r
+       OptInfo Opt;\r
+       Opt.Value = Value;\r
+       Opt.LongName = LongName;\r
+       Opt.iDefault = Default;\r
+       Opt.Help = Help;\r
+       Opt.Type = OT_Enum;\r
+       ParseEnumValues(Values, Opt.EnumValues);\r
+       AddOpt(Opt);\r
+       }\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName)                                                     bool opt_##LongName; bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default)                                     bool opt_##LongName; bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max)           int opt_##LongName; bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max)           unsigned opt_##LongName; bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max)           double opt_##LongName; bool optset_##LongName;\r
+#define STR_OPT(LongName, Default)                                     string opt_##LongName; bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Values, Default)                    int opt_##LongName; bool optset_##LongName;\r
+#include "myopts.h"\r
+\r
+static int EnumStrToInt(const OptInfo &Opt, const string &Value)\r
+       {\r
+       const map<string, unsigned> &e = Opt.EnumValues;\r
+       string s;\r
+       for (map<string, unsigned>::const_iterator p = e.begin(); p != e.end(); ++p)\r
+               {\r
+               if (Value == p->first)\r
+                       return p->second;\r
+               s += " " + p->first;\r
+               }\r
+       CmdLineErr("--%s %s not recognized, valid are: %s",\r
+         Opt.LongName.c_str(), Value.c_str(), s.c_str());\r
+       ureturn(-1);\r
+       }\r
+\r
+static void SetOpt(OptInfo &Opt, const string &Value)\r
+       {\r
+       *Opt.OptSet = true;\r
+       switch (Opt.Type)\r
+               {\r
+       case OT_Int:\r
+               {\r
+               *(int *) Opt.Value = atoi(Value.c_str());\r
+               break;\r
+               }\r
+       case OT_Uns:\r
+               {\r
+               unsigned uValue = 0;\r
+               int n = sscanf(Value.c_str(), "%u", &uValue);\r
+               if (n != 1)\r
+                       CmdLineErr("Invalid value '%s' for --%s",\r
+                         Value.c_str(), Opt.LongName.c_str());\r
+               *(unsigned *) Opt.Value = uValue;\r
+               break;\r
+               }\r
+       case OT_Float:\r
+               {\r
+               *(double *) Opt.Value = atof(Value.c_str());\r
+               break;\r
+               }\r
+       case OT_Str:\r
+               {\r
+               *(string *) Opt.Value = Value;\r
+               break;\r
+               }\r
+       case OT_Enum:\r
+               {\r
+               *(int *) Opt.Value = EnumStrToInt(Opt, Value);\r
+               break;\r
+               }\r
+       default:\r
+               asserta(false);\r
+               }\r
+       }\r
+\r
+void LogOpts()\r
+       {\r
+       for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+               {\r
+               const OptInfo &Opt = *p;\r
+               Log("%s = ", Opt.LongName.c_str());\r
+               switch (Opt.Type)\r
+                       {\r
+               case OT_Flag:\r
+                       Log("%s", (*(bool *) Opt.Value) ? "yes" : "no");\r
+                       break;\r
+               case OT_Tog:\r
+                       Log("%s", (*(bool *) Opt.Value) ? "on" : "off");\r
+                       break;\r
+               case OT_Int:\r
+                       Log("%d", *(int *) Opt.Value);\r
+                       break;\r
+               case OT_Uns:\r
+                       Log("%u", *(unsigned *) Opt.Value);\r
+                       break;\r
+               case OT_Float:\r
+                       {\r
+                       double Value = *(double *) Opt.Value;\r
+                       if (Value == FLT_MAX)\r
+                               Log("*");\r
+                       else\r
+                               Log("%g", Value);\r
+                       break;\r
+                       }\r
+               case OT_Str:\r
+                       Log("%s", (*(string *) Opt.Value).c_str());\r
+                       break;\r
+               case OT_Enum:\r
+                       Log("%d", *(int *) Opt.Value);\r
+                       break;\r
+               default:\r
+                       asserta(false);\r
+                       }\r
+               Log("\n");\r
+               }\r
+       }\r
+\r
+static void CompilerInfo()\r
+       {\r
+#ifdef _FILE_OFFSET_BITS\r
+    printf("_FILE_OFFSET_BITS=%d\n", _FILE_OFFSET_BITS);\r
+#else\r
+    printf("_FILE_OFFSET_BITS not defined\n");\r
+#endif\r
+\r
+#define x(t)   printf("sizeof(" #t ") = %d\n", (int) sizeof(t));\r
+       x(int)\r
+       x(long)\r
+       x(float)\r
+       x(double)\r
+       x(void *)\r
+       x(off_t)\r
+#undef x\r
+       exit(0);\r
+       }\r
+\r
+void Split(const string &Str, vector<string> &Fields, char Sep)\r
+       {\r
+       Fields.clear();\r
+       const unsigned Length = (unsigned) Str.size();\r
+       string s;\r
+       for (unsigned i = 0; i < Length; ++i)\r
+               {\r
+               char c = Str[i];\r
+               if ((Sep == 0 && isspace(c)) || c == Sep)\r
+                       {\r
+                       if (!s.empty() || Sep != 0)\r
+                               Fields.push_back(s);\r
+                       s.clear();\r
+                       }\r
+               else\r
+                       s.push_back(c);\r
+               }\r
+       if (!s.empty())\r
+               Fields.push_back(s);\r
+       }\r
+\r
+static void GetArgsFromFile(const string &FileName, vector<string> &Args)\r
+       {\r
+       Args.clear();\r
+\r
+       FILE *f = OpenStdioFile(FileName);\r
+       string Line;\r
+       while (ReadLineStdioFile(f, Line))\r
+               {\r
+               size_t n = Line.find('#');\r
+               if (n != string::npos)\r
+                       Line = Line.substr(0, n);\r
+               vector<string> Fields;\r
+               Split(Line, Fields);\r
+               Args.insert(Args.end(), Fields.begin(), Fields.end());\r
+               }\r
+       CloseStdioFile(f);\r
+       }\r
+\r
+void MyCmdLine(int argc, char **argv)\r
+       {\r
+       static unsigned RecurseDepth = 0;\r
+       ++RecurseDepth;\r
+\r
+       DefineFlagOpt("compilerinfo", "Write info about compiler types and #defines to stdout.",\r
+         (void *) &opt_compilerinfo, &optset_compilerinfo);\r
+       DefineFlagOpt("quiet", "Turn off progress messages.", (void *) &opt_quiet, &optset_quiet);\r
+       DefineFlagOpt("version", "Show version and exit.", (void *) &opt_version, &optset_version);\r
+       DefineFlagOpt("logopts", "Log options.", (void *) &opt_logopts, &optset_logopts);\r
+       DefineFlagOpt("help", "Display command-line options.", (void *) &opt_help, &optset_help);\r
+       DefineStrOpt("log", "", "Log file name.", (void *) &opt_log, &optset_log);\r
+\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName)                                             DefineFlagOpt(#LongName, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define TOG_OPT(LongName, Default)                             DefineTogOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define INT_OPT(LongName, Default, Min, Max)   DefineIntOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define UNS_OPT(LongName, Default, Min, Max)   DefineUnsOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define FLT_OPT(LongName, Default, Min, Max)   DefineFloatOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define STR_OPT(LongName, Default)                             DefineStrOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define ENUM_OPT(LongName, Values, Default)            DefineEnumOpt(#LongName, Values, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#include "myopts.h"\r
+\r
+       if (RecurseDepth == 0)\r
+               g_Argv.clear();\r
+\r
+       for (int i = 0; i < argc; ++i) \r
+               g_Argv.push_back(string(argv[i]));\r
+       \r
+\r
+       int i = 1;\r
+       for (;;)\r
+               {\r
+               if (i >= argc)\r
+                       break;\r
+               const string &Arg = g_Argv[i];\r
+               \r
+               if (Arg.empty())\r
+                       continue;\r
+               else if (Arg == "file:" && i + 1 < argc)\r
+                       {\r
+                       const string &FileName = g_Argv[i+1];\r
+                       vector<string> Args;\r
+                       GetArgsFromFile(FileName, Args);\r
+                       for (vector<string>::const_iterator p = Args.begin();\r
+                         p != Args.end(); ++p)\r
+                               {\r
+                               g_Argv.push_back(*p);\r
+                               ++argc;\r
+                               }\r
+                       i += 2;\r
+                       continue;\r
+                       }\r
+               else if (Arg.size() > 1 && Arg[0] == '-')\r
+                       {\r
+                       string LongName = (Arg.size() > 2 && Arg[1] == '-' ? Arg.substr(2) : Arg.substr(1));\r
+                       OptInfo Opt = *GetOptInfo(LongName, true);\r
+                       *Opt.OptSet = true;\r
+                       if (Opt.Type == OT_Flag)\r
+                               {\r
+                               g_Opts.erase(Opt);\r
+                               *(bool *) Opt.Value = true;\r
+                               g_Opts.insert(Opt);\r
+                               ++i;\r
+                               continue;\r
+                               }\r
+                       else if (Opt.Type == OT_Tog)\r
+                               {\r
+                               g_Opts.erase(Opt);\r
+                               if (string("no") + Opt.LongName == LongName)\r
+                                       *(bool *) Opt.Value = false;\r
+                               else\r
+                                       {\r
+                                       asserta(Opt.LongName == LongName);\r
+                                       *(bool *) Opt.Value = true;\r
+                                       }\r
+                               g_Opts.insert(Opt);\r
+                               ++i;\r
+                               continue;\r
+                               }\r
+\r
+                       ++i;\r
+                       if (i >= argc)\r
+                               CmdLineErr("Missing value for option --%s", LongName.c_str());\r
+\r
+                       string Value = g_Argv[i];\r
+                       SetOpt(Opt, Value);\r
+\r
+                       ++i;\r
+                       continue;\r
+                       }\r
+               else\r
+                       CmdLineErr("Expected -option_name or --option_name, got '%s'", Arg.c_str());\r
+               }\r
+\r
+       --RecurseDepth;\r
+       if (RecurseDepth > 0)\r
+               return;\r
+\r
+       if (opt_help)\r
+               Help();\r
+\r
+       if (opt_compilerinfo)\r
+               CompilerInfo();\r
+\r
+       SetLogFileName(opt_log);\r
+\r
+       if (opt_log != "")\r
+               {\r
+               for (int i = 0; i < argc; ++i)\r
+                       Log("%s%s", i == 0 ? "" : " ", g_Argv[i].c_str());\r
+               Log("\n");\r
+               time_t Now = time(0);\r
+               struct tm *t = localtime(&Now);\r
+               const char *s = asctime(t);\r
+               Log("Started %s", s); // there is a newline in s\r
+               Log("Version " MY_VERSION ".%s\n", SVN_VERSION);\r
+               Log("\n");\r
+               }\r
+\r
+       if (opt_logopts)\r
+               LogOpts();\r
+       }\r
+\r
+double Pct(double x, double y)\r
+       {\r
+       if (y == 0.0f)\r
+               return 0.0f;\r
+       return (x*100.0f)/y;\r
+       }\r
+\r
+void GetCmdLine(string &s)\r
+       {\r
+       s.clear();\r
+       for (unsigned i = 0; i < SIZE(g_Argv); ++i)\r
+               {\r
+               if (i > 0)\r
+                       s += " ";\r
+               s += g_Argv[i];\r
+               }\r
+       }\r
+\r
+char *mystrsave(const char *s)\r
+       {\r
+       unsigned n = unsigned(strlen(s));\r
+       char *t = myalloc(char, n+1);\r
+       memcpy(t, s, n+1);\r
+       return t;\r
+       }\r
+\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces)\r
+       {\r
+       for (unsigned i = 0; i < prefixspaces; ++i)\r
+               Log(" ");\r
+       if (u == UINT_MAX)\r
+               Log("%*.*s", w, w, "*");\r
+       else\r
+               Log("%*u", w, u);\r
+       }\r
+\r
+void Logf(float x, unsigned w, unsigned prefixspaces)\r
+       {\r
+       for (unsigned i = 0; i < prefixspaces; ++i)\r
+               Log(" ");\r
+       if (x == FLT_MAX)\r
+               Log("%*.*s", w, w, "*");\r
+       else\r
+               Log("%*.2f", w, x);\r
+       }\r
+\r
+static uint32 g_SLCG_state = 1;\r
+\r
+// Numerical values used by Microsoft C, according to wikipedia:\r
+// http://en.wikipedia.org/wiki/Linear_congruential_generator\r
+static uint32 g_SLCG_a = 214013;\r
+static uint32 g_SLCG_c = 2531011;\r
+\r
+// Simple Linear Congruential Generator\r
+// Bad properties; used just to initialize the better generator.\r
+static uint32 SLCG_rand()\r
+       {\r
+       g_SLCG_state = g_SLCG_state*g_SLCG_a + g_SLCG_c;\r
+       return g_SLCG_state;\r
+       }\r
+\r
+static void SLCG_srand(uint32 Seed)\r
+       {\r
+       g_SLCG_state = Seed;\r
+       for (int i = 0; i < 10; ++i)\r
+               SLCG_rand();\r
+       }\r
+\r
+/***\r
+A multiply-with-carry random number generator, see:\r
+http://en.wikipedia.org/wiki/Multiply-with-carry\r
+\r
+The particular multipliers used here were found on\r
+the web where they are attributed to George Marsaglia.\r
+***/\r
+\r
+static bool g_InitRandDone = false;\r
+static uint32 g_X[5];\r
+\r
+uint32 RandInt32()\r
+       {\r
+       InitRand();\r
+\r
+       uint64 Sum = 2111111111*(uint64) g_X[3] + 1492*(uint64) g_X[2] +\r
+         1776*(uint64) g_X[1] + 5115*(uint64) g_X[0] + g_X[4];\r
+       g_X[3] = g_X[2];\r
+       g_X[2] = g_X[1];\r
+       g_X[1] = g_X[0];\r
+       g_X[4] = (uint32) (Sum >> 32);\r
+       g_X[0] = (uint32) Sum;\r
+       return g_X[0];\r
+       }\r
+\r
+unsigned randu32()\r
+       {\r
+       return (unsigned) RandInt32();\r
+       }\r
+\r
+void InitRand()\r
+       {\r
+       if (g_InitRandDone)\r
+               return;\r
+// Do this first to avoid recursion\r
+       g_InitRandDone = true;\r
+\r
+       unsigned Seed = (optset_randseed ? opt_randseed : (unsigned) (time(0)*getpid()));\r
+       Log("RandSeed=%u\n", Seed);\r
+       SLCG_srand(Seed);\r
+\r
+       for (unsigned i = 0; i < 5; i++)\r
+               g_X[i] = SLCG_rand();\r
+\r
+       for (unsigned i = 0; i < 100; i++)\r
+               RandInt32();\r
+       }\r
+\r
+// MUST COME AT END BECAUSE OF #undef\r
+#if    RCE_MALLOC\r
+#undef mymalloc\r
+#undef myfree\r
+#undef myfree2\r
+void *mymalloc(unsigned bytes, const char *FileName, int Line)\r
+       {\r
+       void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+       return rce_malloc(bytes, FileName, Line);\r
+       }\r
+\r
+void myfree(void *p, const char *FileName, int Line)\r
+       {\r
+       void rce_free(void *p, const char *FileName, int Line);\r
+       rce_free(p, FileName, Line);\r
+       }\r
+\r
+void myfree2(void *p, unsigned bytes, const char *FileName, int Line)\r
+       {\r
+       void rce_free(void *p, const char *FileName, int Line);\r
+       rce_free(p, FileName, Line);\r
+       }\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes)\r
+       {\r
+       ++g_NewCalls;\r
+       if (g_InitialMemUseBytes == 0)\r
+               g_InitialMemUseBytes = GetMemUseBytes();\r
+\r
+       g_TotalAllocBytes += bytes;\r
+       g_NetBytes += bytes;\r
+       if (g_NetBytes > g_MaxNetBytes)\r
+               {\r
+               if (g_NetBytes > g_MaxNetBytes + 10000000)\r
+                       GetMemUseBytes();//to force update of peak\r
+               g_MaxNetBytes = g_NetBytes;\r
+               }\r
+       void *p = malloc(bytes);\r
+       //void *p = _malloc_dbg(bytes, _NORMAL_BLOCK, __FILE__, __LINE__);\r
+       if (0 == p)\r
+               {\r
+               double b = GetMemUseBytes();\r
+               fprintf(stderr, "\nOut of memory mymalloc(%u), curr %.3g bytes",\r
+                 (unsigned) bytes, b);\r
+               void LogAllocs();\r
+               LogAllocs();\r
+#if DEBUG && defined(_MSC_VER)\r
+               asserta(_CrtCheckMemory());\r
+#endif\r
+               Die("Out of memory, mymalloc(%u), curr %.3g bytes\n",\r
+                 (unsigned) bytes, b);\r
+               }\r
+       return p;\r
+       }\r
+\r
+void myfree(void *p)\r
+       {\r
+       if (p == 0)\r
+               return;\r
+       free(p);\r
+       //_free_dbg(p, _NORMAL_BLOCK);\r
+       }\r
+\r
+void myfree2(void *p, unsigned bytes)\r
+       {\r
+       ++g_FreeCalls;\r
+       g_TotalFreeBytes += bytes;\r
+       g_NetBytes -= bytes;\r
+\r
+       if (p == 0)\r
+               return;\r
+       free(p);\r
+       }\r
+#endif\r
diff --git a/myutils.h b/myutils.h
new file mode 100644 (file)
index 0000000..b63ad3c
--- /dev/null
+++ b/myutils.h
@@ -0,0 +1,274 @@
+#ifndef myutils_h\r
+#define myutils_h\r
+\r
+#define RCE_MALLOC     0\r
+
+#include <stdio.h>\r
+#include <sys/types.h>\r
+#include <string>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <vector>\r
+#include <math.h>\r
+#include <stdarg.h>\r
+#include <cstdlib>\r
+#include <climits>\r
+\r
+#ifndef _MSC_VER\r
+#include <inttypes.h>\r
+#endif\r
+\r
+using namespace std;\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#pragma warning(disable: 4996) // deprecated functions\r
+#define _CRT_SECURE_NO_DEPRECATE       1\r
+#endif\r
+\r
+#if defined(_DEBUG) && !defined(DEBUG)\r
+#define DEBUG  1\r
+#endif\r
+\r
+#if defined(DEBUG) && !defined(_DEBUG)\r
+#define _DEBUG 1\r
+#endif\r
+\r
+#ifndef NDEBUG\r
+#define        DEBUG   1\r
+#define        _DEBUG  1\r
+#endif\r
+\r
+typedef unsigned char byte;\r
+typedef unsigned short uint16;\r
+typedef unsigned uint32;\r
+typedef int int32;\r
+typedef double float32;\r
+typedef signed char int8;\r
+typedef unsigned char uint8;\r
+\r
+#ifdef _MSC_VER\r
+\r
+typedef __int64 int64;\r
+typedef unsigned __int64 uint64;\r
+\r
+#define INT64_PRINTF           "lld"\r
+#define UINT64_PRINTF          "llu"\r
+\r
+#define SIZE_T_PRINTF          "u"\r
+#define OFF64_T_PRINTF         "lld"\r
+\r
+#define INT64_PRINTFX          "llx"\r
+#define UINT64_PRINTFX         "llx"\r
+\r
+#define SIZE_T_PRINTFX         "x"\r
+#define OFF64_T_PRINTFX                "llx"\r
+\r
+#elif defined(__x86_64__)\r
+\r
+typedef long int64;\r
+typedef unsigned long uint64;\r
+\r
+#define INT64_PRINTF           "ld"\r
+#define UINT64_PRINTF          "lu"\r
+\r
+#define SIZE_T_PRINTF          "lu"\r
+#define OFF64_T_PRINTF         "ld"\r
+\r
+#define INT64_PRINTFX          "lx"\r
+#define UINT64_PRINTFX         "lx"\r
+\r
+#define SIZE_T_PRINTFX         "lx"\r
+#define OFF64_T_PRINTFX                "lx"\r
+\r
+#else\r
+\r
+typedef long long int64;\r
+typedef unsigned long long uint64;\r
+\r
+#define INT64_PRINTF           "lld"\r
+#define UINT64_PRINTF          "llu"\r
+\r
+#define SIZE_T_PRINTF          "u"\r
+#define OFF64_T_PRINTF         "lld"\r
+\r
+#define INT64_PRINTFX          "llx"\r
+#define UINT64_PRINTFX         "llx"\r
+\r
+#define SIZE_T_PRINTFX         "x"\r
+#define OFF64_T_PRINTFX                "llx"\r
+#endif\r
+\r
+#define d64            INT64_PRINTF\r
+#define        u64             UINT64_PRINTF\r
+#define        x64             UINT64_PRINTFX\r
+\r
+// const uint64 UINT64_MAX                     = (~((uint64) 0));\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line);\r
+#undef  assert\r
+#ifdef  NDEBUG\r
+#define assert(exp)     ((void)0)\r
+#define myassert(exp)     ((void)0)\r
+#else\r
+#define assert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#define myassert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#endif\r
+#define asserta(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+\r
+#define ureturn(x)     return (x)\r
+\r
+#define NotUsed(v)     ((void *) &v)\r
+\r
+// pom=plus or minus, tof=true or false\r
+static inline char pom(bool Plus)      { return Plus ? '+' : '-'; }\r
+static inline char tof(bool x)         { return x ? 'T' : 'F'; }\r
+static inline char yon(bool x)         { return x ? 'Y' : 'N'; }\r
+unsigned GetElapsedSecs();\r
+\r
+#if    RCE_MALLOC\r
+\r
+void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+void rce_free(void *p, const char *FileName, int LineNr);\r
+void rce_chkmem();\r
+\r
+void rce_dumpmem_(const char *FileName, int LineNr);\r
+#define rce_dumpmem()          rce_dumpmem_(__FILE__, __LINE__)\r
+\r
+void rce_assertvalidptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_assertvalidptr(p)  rce_assertvalidptr_(p, __FILE__, __LINE__)\r
+\r
+void rce_dumpptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_dumpptr(p) rce_dumpptr_(p, __FILE__, __LINE__)\r
+\r
+#define mymalloc(n)            rce_malloc((n), __FILE__, __LINE__)\r
+#define myfree(p)              rce_free(p, __FILE__, __LINE__)\r
+#define myfree2(p,n)   rce_free(p, __FILE__, __LINE__)\r
+#define myalloc(t, n)  (t *) rce_malloc((n)*sizeof(t), __FILE__, __LINE__)\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes);\r
+void myfree2(void *p, unsigned Bytes);\r
+void myfree(void *p);\r
+#define rce_chkmem()   /* empty */\r
+#define myalloc(t, n)  (t *) mymalloc((n)*sizeof(t))\r
+#endif // RCE_MALLOC\r
+\r
+#define SIZE(c)        unsigned((c).size())\r
+\r
+bool myisatty(int fd);\r
+\r
+#ifdef _MSC_VER\r
+#define off_t  __int64\r
+#endif\r
+\r
+FILE *OpenStdioFile(const string &FileName);\r
+FILE *CreateStdioFile(const string &FileName);\r
+bool CanSetStdioFilePos(FILE *f);\r
+void CloseStdioFile(FILE *f);\r
+void SetStdioFilePos(FILE *f, off_t Pos);\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes);\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, string &Line);\r
+byte *ReadAllStdioFile(FILE *f, off_t &FileSize);\r
+byte *ReadAllStdioFile(const string &FileName, off_t &FileSize);\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo);\r
+void FlushStdioFile(FILE *f);\r
+bool StdioFileExists(const string &FileName);\r
+off_t GetStdioFilePos(FILE *f);\r
+off_t GetStdioFileSize(FILE *f);\r
+void LogStdioFileState(FILE *f);\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo);\r
+void DeleteStdioFile(const string &FileName);\r
+\r
+void myvstrprintf(string &Str, const char *szFormat, va_list ArgList);\r
+void myvstrprintf(string &Str, const char *szFormat, ...);\r
+\r
+void SetLogFileName(const string &FileName);\r
+void Log(const char *szFormat, ...);\r
+\r
+void Die(const char *szFormat, ...);\r
+void Warning(const char *szFormat, ...);\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...);\r
+void Progress(const char *szFormat, ...);\r
+void Progress(const string &Str);\r
+void ProgressLog(const char *szFormat, ...);\r
+void ProgressExit();\r
+\r
+char *mystrsave(const char *s);\r
+\r
+double GetPeakMemUseBytes();\r
+\r
+// Are two floats equal to within epsilon?\r
+const double epsilon = 0.01;\r
+inline bool feq(double x, double y, double epsilon)\r
+       {\r
+       if (fabs(x) > 10000)\r
+               epsilon = fabs(x)/10000;\r
+       if (fabs(x - y) > epsilon)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+inline bool feq(double x, double y)\r
+       {\r
+       if (x < -1e6 && y < -1e6)\r
+               return true;\r
+       double e = epsilon;\r
+       if (fabs(x) > 10000)\r
+               e = fabs(x)/10000;\r
+       if (fabs(x - y) > e)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+#define asserteq(x, y) assert(feq(x, y))\r
+#define assertaeq(x, y)        asserta(feq(x, y))\r
+\r
+#define        zero(a, n)      memset(a, 0, n*sizeof(a[0]))\r
+\r
+void InitRand();\r
+unsigned randu32();\r
+void Split(const string &Str, vector<string> &Fields, char Sep = 0);\r
+double Pct(double x, double y);\r
+double GetMemUseBytes();\r
+const char *MemBytesToStr(double Bytes);\r
+const char *IntToStr(unsigned i);\r
+const char *FloatToStr(double d);\r
+const char *SecsToStr(double Secs);\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces = 2);\r
+void Logf(float x, unsigned w, unsigned prefixspaces = 2);\r
+const char *SecsToHHMMSS(int Secs);\r
+\r
+void MyCmdLine(int argc, char **argv);\r
+void CmdLineErr(const char *Format, ...);\r
+void Help();\r
+void GetCmdLine(string &s);\r
+\r
+#define FLAG_OPT(LongName)                                             extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default)                             extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max)   extern int opt_##LongName; extern bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max)   extern unsigned opt_##LongName; extern bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max)   extern double opt_##LongName; extern bool optset_##LongName;\r
+#define STR_OPT(LongName, Default)                             extern string opt_##LongName; extern bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Default, Values)            extern int opt_##LongName; extern bool optset_##LongName;\r
+#include "myopts.h"\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+\r
+extern const char *SVN_VERSION;\r
+extern const char *SVN_MODS;\r
+extern bool opt_quiet;
+extern bool opt_version;
+extern FILE *g_fLog;
+\r
+#endif // myutils_h\r
diff --git a/orf.h b/orf.h
new file mode 100644 (file)
index 0000000..90b29d1
--- /dev/null
+++ b/orf.h
@@ -0,0 +1,37 @@
+#ifndef orf_h\r
+#define orf_h\r
+\r
+#include "alpha.h"\r
+\r
+struct ORFData\r
+       {\r
+       const byte *NucSeq;\r
+       const byte *AminoSeq;\r
+       int Frame;\r
+       unsigned NucL;\r
+       unsigned AminoL;\r
+       unsigned NucLo;\r
+       unsigned NucHi;\r
+       ORFData *Next;\r
+\r
+       unsigned GetNucPosFirstBase() const;\r
+       unsigned GetAAPos(unsigned NucPos) const;\r
+       unsigned GetCodex(unsigned NucPos) const;\r
+       unsigned GetNucLo(unsigned AALo, unsigned AAHi) const;\r
+       unsigned GetNucHi(unsigned AALo, unsigned AAHi) const;\r
+       unsigned GetAALo(unsigned NucLo, unsigned NucHi) const;\r
+       unsigned GetAAHi(unsigned NucLo, unsigned NucHi) const;\r
+       unsigned GetNucPosFirstBaseInCodon(unsigned AAPos) const;\r
+       unsigned GetNucPosLastBaseInCodon(unsigned AAPos) const;\r
+       unsigned RoundToCodonLo(unsigned NucPos) const;\r
+       unsigned RoundToCodonHi(unsigned NucPos) const;\r
+       void LogMe() const;\r
+       void LogMe2() const;\r
+       };\r
+\r
+const byte ORFEND = '.';\r
+\r
+void GetORFs(const byte *NucSeq, unsigned NucL, vector<ORFData> &ORFs,\r
+  unsigned ORFStyle, int FindFrame, int Sign);\r
+\r
+#endif // orf_h\r
diff --git a/out.h b/out.h
new file mode 100644 (file)
index 0000000..4ca50c7
--- /dev/null
+++ b/out.h
@@ -0,0 +1,134 @@
+#ifndef out_h\r
+#define out_h\r
+\r
+#include "seq.h"\r
+#include "hsp.h"\r
+#include "orf.h"\r
+#include "path.h"\r
+#include <float.h>\r
+\r
+struct AlnData\r
+       {\r
+/***\r
+SA.Seq and SB.Seq align.\r
+Reverse strand stuff for nucleotides is handled like this:\r
+       SA.RevComp must be false.\r
+       If SB.RevComp is true, then SA.Seq is r.c.'d relative to the sequence in\r
+       the input file (query or db). If so, coordinates in HSP refer to SB.Seq\r
+       so are also r.c.'d relative to the original sequence.\r
+***/\r
+       SeqData SA;\r
+       SeqData SB;\r
+       HSPData HSP;\r
+       const char *Path;\r
+       char IdDesc[256];\r
+\r
+       float FractId;\r
+       float RawScore;\r
+       float BitScore;\r
+       float Evalue;\r
+\r
+       void LogMe() const\r
+               {\r
+               Log("AD: ");\r
+               HSP.LogMe();\r
+               Log(" %s,%s\n", SA.Label, SB.Label);\r
+               }\r
+       };\r
+\r
+bool OnDerepHit(const SeqData &SA, const SeqData &SB);\r
+\r
+bool OnLocalUngappedHit(const SeqData &SA, const SeqData &SB,\r
+  const HSPData &HSP, float &Evalue, float &FractId);\r
+\r
+bool OnLocalGappedHit(const SeqData &SA, const SeqData &SB,\r
+  const HSPData &HSP, const PathData &PD, float &Evalue, float &FractId);\r
+\r
+bool OnGlobalHit(const SeqData &SA, const SeqData &SB, const PathData &PD,\r
+  float &FractId);\r
+\r
+void OnReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+  const char *Path);\r
+\r
+void OnNotMatched(const char *Label, unsigned L);\r
+void OnNewCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnNewLibCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnLibCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+  const char *Label);\r
+void OnNewCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+  const char *Label);\r
+void OnChainCov(const SeqData &NucleoSD, const SeqData &TargetSD,\r
+  float Score, float ChainCov);\r
+\r
+void SetUserFieldIndexes(const string &s);\r
+\r
+void BlastOut(FILE *f, const AlnData &AD);\r
+void Blast6Out(FILE *f, const AlnData &AD);\r
+void FastaPairOut(FILE *f, const AlnData &AD);\r
+void UserOut(FILE *f, const AlnData &AD);\r
+\r
+void BlastOutORF(FILE *f, const AlnData &AD);\r
+\r
+void OpenOutputFiles();\r
+void CloseOutputFiles();\r
+void SetLibSeedCount(unsigned DBSeqCount);\r
+const char *UserFieldIndexToStr(unsigned i);\r
+\r
+extern float **g_SubstMx;\r
+\r
+static char g_IdChar = '|';\r
+static char g_DiffChar = ' ';\r
+\r
+static inline char GetSymN(byte Letter1, byte Letter2)\r
+       {\r
+       Letter1 = toupper(Letter1);\r
+       Letter2 = toupper(Letter2);\r
+       if (Letter1 == Letter2)\r
+               return g_IdChar;\r
+       return g_DiffChar;\r
+       }\r
+\r
+static inline char GetSymA(byte Letter1, byte Letter2)\r
+       {\r
+       Letter1 = toupper(Letter1);\r
+       Letter2 = toupper(Letter2);\r
+       if (Letter1 == Letter2)\r
+               return '|';\r
+\r
+       float Score = g_SubstMx[Letter1][Letter2];\r
+       if (Score >= 2.0f)\r
+               return ':';\r
+       if (Score > 0.0f)\r
+               return '.';\r
+       return ' ';\r
+       }\r
+\r
+static inline char GetSym(byte Letter1, byte Letter2, bool Nucleo)\r
+       {\r
+       if (Nucleo)\r
+               return GetSymN(Letter1, Letter2);\r
+       else\r
+               return GetSymA(Letter1, Letter2);\r
+       }\r
+\r
+static unsigned GetNDig(unsigned n)\r
+       {\r
+       if (n < 10)\r
+               return 1;\r
+       if (n < 100)\r
+               return 2;\r
+       if (n < 1000)\r
+               return 3;\r
+       if (n < 10000)\r
+               return 4;\r
+       if (n < 100000)\r
+               return 5;\r
+       if (n < 1000000)\r
+               return 6;\r
+       return 10;\r
+       }\r
+\r
+extern unsigned *g_UserFieldIndexes;\r
+extern unsigned g_UserFieldCount;\r
+\r
+#endif // out_h\r
diff --git a/path.cpp b/path.cpp
new file mode 100644 (file)
index 0000000..9340344
--- /dev/null
+++ b/path.cpp
@@ -0,0 +1,151 @@
+#include "myutils.h"\r
+#include "path.h"\r
+#include "timing.h"\r
+\r
+#define TRACE  0\r
+\r
+const unsigned PathMagic = 0x9A783A16;\r
+\r
+struct PathBuffer\r
+       {\r
+       unsigned Magic;\r
+       char *Buffer;\r
+       unsigned Size;\r
+       bool InUse;\r
+       };\r
+\r
+static PathBuffer **g_PathBuffers;\r
+static unsigned g_PathBufferSize;\r
+\r
+static char *AllocBuffer(unsigned Size)\r
+       {\r
+       if (Size == 0)\r
+               return 0;\r
+\r
+// Is a free buffer that is big enough?\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = g_PathBuffers[i];\r
+               asserta(PB->Magic == PathMagic);\r
+               if (!PB->InUse)\r
+                       {\r
+                       if (PB->Size >= Size)\r
+                               {\r
+                               PB->InUse = true;\r
+                               return PB->Buffer;\r
+                               }\r
+                       if (PB->Buffer == 0)\r
+                               {\r
+                               unsigned Size2 = Size + 1024;\r
+                               PB->Buffer = MYALLOC(char, Size2, Path);\r
+                               PB->Size = Size2;\r
+                               PB->InUse = true;\r
+                               return PB->Buffer;\r
+                               }\r
+                       }\r
+               }\r
+\r
+// No available buffer, must expand g_PathBuffers[]\r
+       unsigned NewPathBufferSize = g_PathBufferSize + 1024;\r
+       PathBuffer **NewPathBuffers = MYALLOC(PathBuffer *, NewPathBufferSize, Path);\r
+       \r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               NewPathBuffers[i] = g_PathBuffers[i];\r
+\r
+       for (unsigned i = g_PathBufferSize; i < NewPathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = MYALLOC(PathBuffer, 1, Path);\r
+               PB->Magic = PathMagic;\r
+               PB->Buffer = 0;\r
+               PB->Size = 0;\r
+               PB->InUse = false;\r
+               NewPathBuffers[i] = PB;\r
+               }\r
+\r
+       PathBuffer *PB = NewPathBuffers[g_PathBufferSize];\r
+\r
+       MYFREE(g_PathBuffers, g_PathBufferSize, Path);\r
+       g_PathBuffers = NewPathBuffers;\r
+       g_PathBufferSize = NewPathBufferSize;\r
+\r
+       asserta(!PB->InUse && PB->Buffer == 0);\r
+\r
+       unsigned Size2 = Size + 1024;\r
+       PB->Buffer = MYALLOC(char, Size2, Path);\r
+       PB->Size = Size2;\r
+       PB->InUse = true;\r
+       return PB->Buffer;\r
+       }\r
+\r
+static void FreeBuffer(char *Buffer)\r
+       {\r
+       if (Buffer == 0)\r
+               return;\r
+\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               PathBuffer *PB = g_PathBuffers[i];\r
+               if (PB->Buffer == Buffer)\r
+                       {\r
+                       asserta(PB->InUse);\r
+                       PB->InUse = false;\r
+                       return;\r
+                       }\r
+               }\r
+\r
+       Die("FreeBuffer, not found");\r
+       }\r
+\r
+void PathData::Alloc(unsigned MaxLen)\r
+       {\r
+       if (MaxLen < Bytes)\r
+               return;\r
+\r
+       StartTimer(PathAlloc);\r
+       if (Bytes > 0)\r
+               {\r
+               FreeBuffer(Front);\r
+               }\r
+\r
+       Bytes = MaxLen + 1;\r
+       Front = AllocBuffer(Bytes);\r
+       Back = Front + Bytes - 1;\r
+       Start = 0;\r
+       EndTimer(PathAlloc);\r
+       }\r
+\r
+void PathData::Free()\r
+       {\r
+       FreeBuffer(Front);\r
+       Front = 0;\r
+       Start = 0;\r
+       Back = 0;\r
+       }\r
+\r
+void PathData::Copy(const PathData &rhs)\r
+       {\r
+       Alloc(rhs.Bytes);\r
+       strcpy(Front, rhs.Front);\r
+       Start = Front + (rhs.Start - rhs.Front);\r
+       }\r
+\r
+void PathData::FromStr(const char *PathStr)\r
+       {\r
+       asserta(PathStr != 0);\r
+       unsigned NeededBytes = (unsigned) strlen(PathStr) + 1;\r
+       Alloc(NeededBytes);\r
+       strcpy(Front, PathStr);\r
+       Start = Front;\r
+       }\r
+\r
+void LogPathStats()\r
+       {\r
+       Log("\n");\r
+       unsigned Bytes = 0;\r
+       for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+               {\r
+               const PathBuffer *PB = g_PathBuffers[i];\r
+               Bytes += PB->Size;\r
+               }\r
+       Log("%u paths allocated, total memory %u bytes\n", g_PathBufferSize, Bytes);\r
+       }\r
diff --git a/path.h b/path.h
new file mode 100644 (file)
index 0000000..f63be7e
--- /dev/null
+++ b/path.h
@@ -0,0 +1,63 @@
+#ifndef path_h\r
+#define path_h\r
+\r
+struct PathData\r
+       {\r
+private:\r
+       PathData(PathData &);\r
+       PathData &operator=(PathData &);\r
+\r
+public:\r
+       char *Start;\r
+       char *Front;\r
+       char *Back;\r
+       unsigned Bytes;\r
+\r
+public:\r
+       PathData()\r
+               {\r
+               Clear(true);\r
+               }\r
+       ~PathData()\r
+               {\r
+               Free();\r
+               }\r
+       void Free();\r
+       void Alloc(unsigned MaxLen);\r
+       void Clear(bool ctor = false)\r
+               {\r
+               Start = 0;\r
+               if (ctor)\r
+                       {\r
+                       Front = 0;\r
+                       Back = 0;\r
+                       Bytes = 0;\r
+                       }\r
+               else\r
+                       Free();\r
+               }\r
+       void Copy(const PathData &rhs);\r
+       void FromStr(const char *PathStr);\r
+       void Reverse()\r
+               {\r
+               asserta(Start != 0);\r
+               unsigned L = (unsigned) strlen(Start);\r
+               for (unsigned k = 0; k < L/2; ++k)\r
+                       {\r
+                       char c = Start[k];\r
+                       Start[k] = Start[L-k-1];\r
+                       Start[L-k-1] = c;\r
+                       }\r
+               }\r
+       void SetEmpty()\r
+               {\r
+               Start = 0;\r
+               }\r
+\r
+       bool IsEmpty() const\r
+               {\r
+               return Start == 0;\r
+               }\r
+       };\r
+\r
+#endif // path_h\r
diff --git a/searchchime.cpp b/searchchime.cpp
new file mode 100644 (file)
index 0000000..c00a9c4
--- /dev/null
@@ -0,0 +1,304 @@
+#include "myutils.h"\r
+#include "ultra.h"\r
+#include "chime.h"\r
+#include "uc.h"\r
+#include "dp.h"\r
+#include <set>\r
+#include <algorithm>\r
+\r
+#define TRACE  0\r
+\r
+extern FILE *g_fUChime;\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+  vector<unsigned> &Parents);\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+  const string &PathQA, const string &PathQB, ChimeHit2 &Hit);\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);\r
+\r
+static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,\r
+  vector<unsigned> &IdVec, unsigned d)\r
+       {\r
+       IdVec.clear();\r
+       const unsigned ColCount = SIZE(Path);\r
+\r
+       const byte *Q = QSD.Seq;\r
+       const byte *P = PSD.Seq;\r
+\r
+       const unsigned QL = QSD.L;\r
+       const unsigned PL = PSD.L;\r
+\r
+       if (QL <= d)\r
+               {\r
+               IdVec.resize(QSD.L, 0);\r
+               return;\r
+               }\r
+\r
+       unsigned QPos = 0;\r
+       unsigned PPos = 0;\r
+\r
+       vector<bool> SameVec;\r
+       SameVec.reserve(QL);\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char c = Path[Col];\r
+\r
+               bool Same = false;\r
+               if (c == 'M')\r
+                       {\r
+                       byte q = Q[QPos];\r
+                       byte p = P[PPos];\r
+                       Same = (toupper(q) == toupper(p));\r
+                       }\r
+\r
+               if (c == 'M' || c == 'D')\r
+                       {\r
+                       ++QPos;\r
+                       SameVec.push_back(Same);\r
+                       }\r
+\r
+               if (c == 'M' || c == 'I')\r
+                       ++PPos;\r
+               }\r
+\r
+       asserta(SIZE(SameVec) == QL);\r
+\r
+       unsigned n = 0;\r
+       for (unsigned QPos = 0; QPos < d; ++QPos)\r
+               {\r
+               if (SameVec[QPos])\r
+                       ++n;\r
+               IdVec.push_back(n);\r
+               }\r
+\r
+       for (unsigned QPos = d; QPos < QL; ++QPos)\r
+               {\r
+               if (SameVec[QPos])\r
+                       ++n;\r
+               IdVec.push_back(n);\r
+               if (SameVec[QPos-d])\r
+                       --n;\r
+               }\r
+       asserta(SIZE(IdVec) == QL);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("\n");\r
+       Log("GetSmoothedIdVec\n");\r
+       unsigned QPos = 0;\r
+       unsigned PPos = 0;\r
+       Log("Q P  Same       Id\n");\r
+       Log("- -  ----  -------\n");\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char c = Path[Col];\r
+\r
+               bool Same = false;\r
+               if (c == 'M')\r
+                       {\r
+                       byte q = Q[QPos];\r
+                       byte p = P[PPos];\r
+                       Same = (toupper(q) == toupper(p));\r
+                       Log("%c %c  %4c  %7d\n", q, p, tof(Same), IdVec[QPos]);\r
+                       }\r
+\r
+               if (c == 'M' || c == 'D')\r
+                       ++QPos;\r
+               if (c == 'M' || c == 'I')\r
+                       ++PPos;\r
+               }\r
+       }\r
+#endif\r
+       }\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+  float MinFractId, ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+       Hit.QLabel = QSD.Label;\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("\n");\r
+               Log("SearchChime()\n");\r
+               Log("Query>%s\n", QSD.Label);\r
+               }\r
+\r
+       vector<unsigned> Parents;\r
+       GetCandidateParents(U, QSD, QAb, Parents);\r
+\r
+       unsigned ParentCount = SIZE(Parents);\r
+       if (ParentCount <= 1)\r
+               {\r
+               if (opt_verbose)\r
+                       Log("%u candidate parents, done.\n", ParentCount);\r
+               return false;\r
+               }\r
+\r
+       if (opt_fastalign)\r
+               HF.SetA(QSD);\r
+       HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);\r
+\r
+       unsigned ChunkLength;\r
+       vector<unsigned> ChunkLos;\r
+       GetChunkInfo(QSD.L, ChunkLength, ChunkLos);\r
+       const unsigned ChunkCount = SIZE(ChunkLos);\r
+\r
+       vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);\r
+       vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);\r
+\r
+       vector<SeqData> PSDs;\r
+       vector<string> Paths;\r
+       double TopPctId = 0.0;\r
+       unsigned TopParentIndex = UINT_MAX;\r
+       unsigned QL = QSD.L;\r
+       vector<unsigned> MaxIdVec(QL, 0);\r
+       for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+               {\r
+               unsigned ParentSeqIndex = Parents[ParentIndex];\r
+\r
+               SeqData PSD;\r
+               //PSD.Label = U.GetSeedLabel(ParentSeqIndex);\r
+               //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);\r
+               //PSD.L = U.GetSeedLength(ParentSeqIndex);\r
+               //PSD.Index = ParentSeqIndex;\r
+               U.GetSeqData(ParentSeqIndex, PSD);\r
+               PSDs.push_back(PSD);\r
+\r
+               if (opt_fastalign)\r
+                       HF.SetB(PSD);\r
+\r
+               PathData PD;\r
+\r
+               float HSPId;\r
+               bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);\r
+               if (!Found)\r
+                       {\r
+                       Paths.push_back("");                            \r
+                       continue;\r
+                       }\r
+\r
+               double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);\r
+               if (opt_selfid && PctId == 100.0)\r
+                       {\r
+                       Paths.push_back("");                            \r
+                       continue;\r
+                       }\r
+\r
+               if (PctId > TopPctId)\r
+                       {\r
+                       TopParentIndex = ParentIndex;\r
+                       TopPctId = PctId;\r
+                       if (TopPctId >= 100.0 - opt_mindiv)\r
+                               {\r
+                               if (opt_verbose)\r
+                                       {\r
+                                       Log("  %.1f%%  >%s\n", TopPctId, PSD.Label);\r
+                                       Log("  Top hit exceeds ctl threshold, done.\n");\r
+                                       return false;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+               string Path = PD.Start;\r
+               Paths.push_back(Path);\r
+\r
+               vector<unsigned> IdVec;\r
+               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                       if (IdVec[QPos] > MaxIdVec[QPos])\r
+                               MaxIdVec[QPos] = IdVec[QPos];\r
+               }\r
+\r
+       vector<unsigned> BestParents;\r
+       for (unsigned k = 0; k < opt_maxp; ++k)\r
+               {\r
+               unsigned BestParent = UINT_MAX;\r
+               unsigned BestCov = 0;\r
+               for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+                       {\r
+                       const SeqData &PSD = PSDs[ParentIndex];\r
+                       const string &Path = Paths[ParentIndex];\r
+                       if (Path == "")\r
+                               continue;\r
+\r
+                       vector<unsigned> IdVec;\r
+                       GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+                       unsigned Cov = 0;\r
+                       for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                               if (IdVec[QPos] == MaxIdVec[QPos])\r
+                                       ++Cov;\r
+\r
+                       if (Cov > BestCov)\r
+                               {\r
+                               BestParent = ParentIndex;\r
+                               BestCov = Cov;\r
+                               }\r
+                       }\r
+\r
+               if (BestParent == UINT_MAX)\r
+                       break;\r
+\r
+               BestParents.push_back(BestParent);\r
+               vector<unsigned> IdVec;\r
+\r
+               const SeqData &PSD = PSDs[BestParent];\r
+               const string &Path = Paths[BestParent];\r
+               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+                       if (IdVec[QPos] == MaxIdVec[QPos])\r
+                               MaxIdVec[QPos] = UINT_MAX;\r
+               }\r
+\r
+       unsigned BestParentCount = SIZE(BestParents);\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("%u/%u best parents\n", BestParentCount, ParentCount);\r
+               for (unsigned k = 0; k < BestParentCount; ++k)\r
+                       {\r
+                       unsigned i = BestParents[k];\r
+                       Log(" %s\n", PSDs[i].Label);\r
+                       }\r
+               }\r
+\r
+       bool Found = false;\r
+       for (unsigned k1 = 0; k1 < BestParentCount; ++k1)\r
+               {\r
+               unsigned i1 = BestParents[k1];\r
+               asserta(i1 < ParentCount);\r
+\r
+               const SeqData &PSD1 = PSDs[i1];\r
+               const string &Path1 = Paths[i1];\r
+\r
+               for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)\r
+                       {\r
+                       unsigned i2 = BestParents[k2];\r
+                       asserta(i2 < ParentCount);\r
+                       asserta(i2 != i1);\r
+\r
+                       const SeqData &PSD2 = PSDs[i2];\r
+                       const string &Path2 = Paths[i2];\r
+\r
+                       ChimeHit2 Hit2;\r
+                       AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);\r
+                       Hit2.PctIdQT = TopPctId;\r
+\r
+                       if (Hit2.Accept())\r
+                               Found = true;\r
+\r
+                       if (Hit2.Score > Hit.Score)\r
+                               Hit = Hit2;\r
+\r
+                       if (opt_verbose)\r
+                               Hit2.LogMe();\r
+                       }\r
+               }\r
+\r
+       return Found;\r
+       }\r
diff --git a/seq.h b/seq.h
new file mode 100644 (file)
index 0000000..9014641
--- /dev/null
+++ b/seq.h
@@ -0,0 +1,38 @@
+#ifndef seq_h\r
+#define seq_h\r
+\r
+struct ORFData;\r
+\r
+struct SeqData\r
+       {\r
+       const char *Label;\r
+       const byte *Seq;\r
+       unsigned L;\r
+       unsigned Index;\r
+\r
+// RevComp means that SeqData.Seq is reverse-complemented relative\r
+// to the sequence in the input file (query or db). Coordinates in\r
+// a hit (e.g., AlnData) will be relative to SeqData.Seq, so both\r
+// the sequence and the coordinates should be r.c.'d for output.\r
+       bool RevComp;\r
+       bool Nucleo;\r
+       const ORFData *ORFParent;\r
+\r
+       SeqData()\r
+               {\r
+               Clear();\r
+               }\r
+\r
+       void Clear()\r
+               {\r
+               Label = 0;\r
+               Seq = 0;\r
+               L = 0;\r
+               Index = UINT_MAX;\r
+               RevComp = false;\r
+               Nucleo = false;\r
+               ORFParent = 0;\r
+               }\r
+       };\r
+\r
+#endif // seq_h\r
diff --git a/seqdb.cpp b/seqdb.cpp
new file mode 100644 (file)
index 0000000..03de189
--- /dev/null
+++ b/seqdb.cpp
@@ -0,0 +1,289 @@
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+#include "sfasta.h"\r
+#include "seq.h"\r
+\r
+void SeqToFasta(FILE *f, const char *Label, const byte *Seq, unsigned L)\r
+       {\r
+       const unsigned ROWLEN = 80;\r
+       if (Label != 0)\r
+               fprintf(f, ">%s\n", Label);\r
+       unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+       for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+               {\r
+               unsigned From = BlockIndex*ROWLEN;\r
+               unsigned To = From + ROWLEN;\r
+               if (To >= L)\r
+                       To = L;\r
+               for (unsigned Pos = From; Pos < To; ++Pos)\r
+                       fputc(Seq[Pos], f);\r
+               fputc('\n', f);\r
+               }\r
+       }\r
+\r
+SeqDB::~SeqDB()\r
+       {\r
+       Clear();\r
+       }\r
+\r
+SeqDB::SeqDB()\r
+       {\r
+       Clear(true);\r
+       }\r
+\r
+void SeqDB::Clear(bool ctor)\r
+       {\r
+       if (!ctor)\r
+               {\r
+               for (unsigned i = 0; i < m_SeqCount; ++i)\r
+                       {\r
+                       unsigned n = strlen(m_Labels[i]);\r
+                       MYFREE(m_Labels[i], n, SeqDB);\r
+                       MYFREE(m_Seqs[i], m_SeqLengths[i], SeqDB);\r
+                       }\r
+               MYFREE(m_Labels, m_Size, SeqDB);\r
+               MYFREE(m_Seqs, m_Size, SeqDB);\r
+               MYFREE(m_SeqLengths, m_Size, SeqDB);\r
+               }\r
+\r
+       m_FileName.clear();\r
+       m_SeqCount = 0;\r
+       m_Size = 0;\r
+\r
+       m_Labels = 0;\r
+       m_Seqs = 0;\r
+       m_SeqLengths = 0;\r
+\r
+       m_Aligned = false;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       }\r
+\r
+void SeqDB::InitEmpty(bool Nucleo)\r
+       {\r
+       Clear();\r
+       m_IsNucleo = Nucleo;\r
+       m_IsNucleoSet = true;\r
+       }\r
+\r
+void SeqDB::FromFasta(const string &FileName, bool AllowGaps)\r
+       {\r
+       Clear();\r
+       m_FileName = FileName;\r
+       SFasta SF;\r
+\r
+       SF.Open(FileName);\r
+       SF.m_AllowGaps = AllowGaps;\r
+\r
+       ProgressStep(0, 1000, "Reading %s", FileName.c_str());\r
+       for (;;)\r
+               {\r
+               unsigned QueryPctDoneX10 = SF.GetPctDoneX10();\r
+               ProgressStep(QueryPctDoneX10, 1000, "Reading %s", FileName.c_str());\r
+               const byte *Seq = SF.GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+\r
+               const char *Label = SF.GetLabel();\r
+               unsigned L = SF.GetSeqLength();\r
+               AddSeq(Label, Seq, L);\r
+               }\r
+       ProgressStep(999, 1000, "Reading %s", FileName.c_str());\r
+\r
+       SetIsNucleo();\r
+\r
+       Progress("%s sequences\n", IntToStr(GetSeqCount()));\r
+       }\r
+\r
+void SeqDB::ToFasta(const string &FileName) const\r
+       {\r
+       FILE *f = CreateStdioFile(FileName);\r
+       for (unsigned SeqIndex = 0; SeqIndex < GetSeqCount(); ++SeqIndex)\r
+               ToFasta(f, SeqIndex);\r
+       CloseStdioFile(f);\r
+       }\r
+\r
+void SeqDB::SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel) const\r
+       {\r
+       if (WithLabel)\r
+               fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+\r
+       const unsigned ROWLEN = 80;\r
+\r
+       unsigned L = GetSeqLength(SeqIndex);\r
+       const byte *Seq = GetSeq(SeqIndex);\r
+       unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+       for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+               {\r
+               unsigned From = BlockIndex*ROWLEN;\r
+               unsigned To = From + ROWLEN;\r
+               if (To >= L)\r
+                       To = L;\r
+               for (unsigned Pos = From; Pos < To; ++Pos)\r
+                       fputc(Seq[Pos], f);\r
+               fputc('\n', f);\r
+               }\r
+       }\r
+\r
+void SeqDB::ToFasta(FILE *f, unsigned SeqIndex) const\r
+       {\r
+       asserta(SeqIndex < m_SeqCount);\r
+       fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+       SeqToFasta(f, SeqIndex);\r
+       }\r
+\r
+unsigned SeqDB::GetMaxLabelLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned MaxL = 0;\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               unsigned L = (unsigned) strlen(m_Labels[Index]);\r
+               if (L > MaxL)\r
+                       MaxL = L;\r
+               }\r
+       return MaxL;\r
+       }\r
+\r
+unsigned SeqDB::GetMaxSeqLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned MaxL = 0;\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               unsigned L = m_SeqLengths[Index];\r
+               if (L > MaxL)\r
+                       MaxL = L;\r
+               }\r
+       return MaxL;\r
+       }\r
+\r
+void SeqDB::LogMe() const\r
+       {\r
+       Log("\n");\r
+       const unsigned SeqCount = GetSeqCount();\r
+       Log("SeqDB %u seqs, aligned=%c\n", SeqCount, tof(m_Aligned));\r
+       if (SeqCount == 0)\r
+               return;\r
+\r
+       Log("Index             Label  Length  Seq\n");\r
+       Log("-----  ----------------  ------  ---\n");\r
+       for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+               {\r
+               Log("%5u", Index);\r
+               Log("  %16.16s", m_Labels[Index]);\r
+               unsigned L = m_SeqLengths[Index];\r
+               Log("  %6u", L);\r
+               Log("  %*.*s", L, L, m_Seqs[Index]);\r
+               Log("\n");\r
+               }\r
+       }\r
+\r
+void SeqDB::GetSeqData(unsigned Id, SeqData &Buffer) const\r
+       {\r
+       asserta(Id < m_SeqCount);\r
+       Buffer.Seq = m_Seqs[Id];\r
+       Buffer.Label = m_Labels[Id];\r
+       Buffer.L = m_SeqLengths[Id];\r
+       Buffer.Index = Id;\r
+       Buffer.ORFParent = 0;\r
+       Buffer.RevComp = false;\r
+       Buffer.Nucleo = IsNucleo();\r
+       }\r
+\r
+void SeqDB::SetIsNucleo()\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned N = 0;\r
+       for (unsigned i = 0; i < 100; ++i)\r
+               {\r
+               unsigned SeqIndex = unsigned(rand()%SeqCount);\r
+               const byte *Seq = GetSeq(SeqIndex);\r
+               unsigned L = GetSeqLength(SeqIndex);\r
+               const unsigned Pos = unsigned(rand()%L);\r
+               byte c = Seq[Pos];\r
+\r
+               if (g_IsNucleoChar[c])\r
+                       ++N;\r
+               }\r
+       m_IsNucleo = (N > 80);\r
+       m_IsNucleoSet = true;\r
+       }\r
+\r
+unsigned SeqDB::GetTotalLength() const\r
+       {\r
+       const unsigned SeqCount = GetSeqCount();\r
+       unsigned TotalLength = 0;\r
+       for (unsigned Id = 0; Id < SeqCount; ++Id)\r
+               TotalLength += GetSeqLength(Id);\r
+       return TotalLength;\r
+       }\r
+\r
+unsigned SeqDB::AddSeq(const char *Label, const byte *Seq, unsigned L)\r
+       {\r
+       StartTimer(AddSeq);\r
+       if (m_SeqCount >= m_Size)\r
+               {\r
+               unsigned NewSize = unsigned(m_Size*1.5) + 1024;\r
+               char **NewLabels = MYALLOC(char *, NewSize, SeqDB);\r
+               byte **NewSeqs = MYALLOC(byte *, NewSize, SeqDB);\r
+               unsigned *NewSeqLengths = MYALLOC(unsigned, NewSize, SeqDB);\r
+\r
+               for (unsigned i = 0; i < m_SeqCount; ++i)\r
+                       {\r
+                       NewLabels[i] = m_Labels[i];\r
+                       NewSeqs[i] = m_Seqs[i];\r
+                       NewSeqLengths[i] = m_SeqLengths[i];\r
+                       }\r
+\r
+               MYFREE(m_Labels, m_SeqCount, SeqDB);\r
+               MYFREE(m_Seqs, m_SeqCount, SeqDB);\r
+               MYFREE(m_SeqLengths, m_SeqCount, SeqDB);\r
+\r
+               m_Labels = NewLabels;\r
+               m_Seqs = NewSeqs;\r
+               m_SeqLengths = NewSeqLengths;\r
+               m_Size = NewSize;\r
+               }\r
+\r
+       unsigned Index = m_SeqCount++;\r
+       m_Seqs[Index] = MYALLOC(byte, L, SeqDB);\r
+       memcpy(m_Seqs[Index], Seq, L);\r
+\r
+       unsigned n = strlen(Label) + 1;\r
+       m_Labels[Index] = MYALLOC(char, n, SeqDB);\r
+       memcpy(m_Labels[Index], Label, n);\r
+\r
+       if (Index == 0)\r
+               m_Aligned = true;\r
+       else\r
+               m_Aligned = (m_Aligned && L == m_SeqLengths[0]);\r
+\r
+       m_SeqLengths[Index] = L;\r
+\r
+       EndTimer(AddSeq);\r
+       return Index;\r
+       }\r
+\r
+unsigned SeqDB::GetIndex(const char *Label) const\r
+       {\r
+       for (unsigned i = 0; i < m_SeqCount; ++i)\r
+               if (strcmp(Label, m_Labels[i]) == 0)\r
+                       return i;\r
+       Die("SeqDB::GetIndex(%s), not found", Label);\r
+       return UINT_MAX;\r
+       }\r
+\r
+void SeqDB::MakeLabelToIndex(map<string, unsigned> &LabelToIndex)\r
+       {\r
+       LabelToIndex.clear();\r
+       for (unsigned i = 0; i < m_SeqCount; ++i)\r
+               {\r
+               const string &Label = string(GetLabel(i));\r
+               if (LabelToIndex.find(Label) != LabelToIndex.end())\r
+                       Die("Duplicate label: %s", Label.c_str());\r
+               LabelToIndex[Label] = i;\r
+               }\r
+       }\r
diff --git a/seqdb.h b/seqdb.h
new file mode 100644 (file)
index 0000000..fafbdd9
--- /dev/null
+++ b/seqdb.h
@@ -0,0 +1,109 @@
+#ifndef seqdb_h\r
+#define seqdb_h\r
+\r
+#include <vector>\r
+#include <map>\r
+#include "myutils.h"\r
+\r
+struct SeqData;\r
+\r
+using namespace std;\r
+\r
+struct SeqDB\r
+       {\r
+private:\r
+       SeqDB(const SeqDB &rhs);\r
+       SeqDB &operator=(const SeqDB &rhs);\r
+\r
+public:\r
+       string m_FileName;\r
+       char **m_Labels;\r
+       byte **m_Seqs;\r
+       unsigned *m_SeqLengths;\r
+       unsigned m_SeqCount;\r
+       unsigned m_Size;\r
+\r
+       bool m_Aligned;\r
+       bool m_IsNucleo;\r
+       bool m_IsNucleoSet;\r
+\r
+public:\r
+       SeqDB();\r
+       ~SeqDB();\r
+       void Clear(bool ctor = false);\r
+       void InitEmpty(bool Nucleo);\r
+\r
+       unsigned AddSeq(const char *Label, const byte *Seq, unsigned L);\r
+\r
+       byte *GetSeq(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_Seqs[SeqIndex];\r
+               }\r
+\r
+       const char *GetLabel(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_Labels[SeqIndex];\r
+               }\r
+\r
+       unsigned GetSeqLength(unsigned SeqIndex) const\r
+               {\r
+               asserta(SeqIndex < m_SeqCount);\r
+               return m_SeqLengths[SeqIndex];\r
+               }\r
+\r
+       unsigned GetSeqCount() const\r
+               {\r
+               return m_SeqCount;\r
+               }\r
+\r
+       unsigned GetPairCount() const\r
+               {\r
+               unsigned SeqCount = GetSeqCount();\r
+               return (SeqCount*(SeqCount - 1))/2;\r
+               }\r
+\r
+       unsigned GetPairIndex(unsigned SeqIndex1, unsigned SeqIndex2) const\r
+               {\r
+               if (SeqIndex1 > SeqIndex2)\r
+                       return (SeqIndex1*(SeqIndex1 - 1))/2 + SeqIndex2;\r
+               return (SeqIndex2*(SeqIndex2 - 1))/2 + SeqIndex1;\r
+               }\r
+\r
+       unsigned GetColCount() const\r
+               {\r
+               if (!m_Aligned)\r
+                       Die("SeqDB::GetColCount, not aligned");\r
+               if (m_SeqCount == 0)\r
+                       Die("SeqDB::GetColCount, empty");\r
+               return m_SeqLengths[0];\r
+               }\r
+\r
+       bool IsNucleo() const\r
+               {\r
+               asserta(m_IsNucleoSet);\r
+               return m_IsNucleo;\r
+               }\r
+\r
+       void GetSeqData(unsigned Id, SeqData &Buffer) const;\r
+\r
+       unsigned GetMaxLabelLength() const;\r
+       unsigned GetMaxSeqLength() const;\r
+       void SetIsNucleo();\r
+       unsigned GetIndex(const char *Label) const;\r
+       void MakeLabelToIndex(map<string, unsigned> &LabelToIndex);\r
+\r
+       void LogMe() const;\r
+       void FromFasta(const string &FileName, bool AllowGaps = false);\r
+\r
+       void ToFasta(const string &FileName) const;\r
+       void ToFasta(FILE *f, unsigned SeqIndex) const;\r
+       void SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel = false) const;\r
+\r
+       unsigned GetTotalLength() const;\r
+       };\r
+\r
+bool isgap(byte c);\r
+\r
+#endif\r
index 1ca79b1b7c738f10f8a23b1d2fe7ca353e0dcd09..620de95c333cdee1360157f7aa7068f3ce1f1f62 100644 (file)
@@ -15,6 +15,7 @@ vector<string> SeqSummaryCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
diff --git a/setnucmx.cpp b/setnucmx.cpp
new file mode 100644 (file)
index 0000000..030ff5a
--- /dev/null
@@ -0,0 +1,77 @@
+#include "myutils.h"
+#include "mx.h"
+
+Mx<float> g_SubstMxf;
+float **g_SubstMx;
+
+static const char Alphabet[] = "ACGTU";
+
+void SetNucSubstMx(double Match, double Mismatch)\r
+       {\r
+       static bool Done = false;\r
+       if (Done)\r
+               return;\r
+       Done = true;\r
+\r
+       if (Match <= 0.0)\r
+               Die("Match score should be +ve");\r
+       if (Mismatch >= 0.0)\r
+               Die("Mismatch score should be -ve");\r
+\r
+       unsigned N = unsigned(strlen(Alphabet));\r
+\r
+       g_SubstMxf.Alloc("NUCMX", 256, 256);\r
+       strcpy(g_SubstMxf.m_Alpha, "ACGT");\r
+       g_SubstMxf.Init(0);\r
+       g_SubstMx = g_SubstMxf.GetData();\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               for (unsigned j = 0; j < N; ++j)\r
+                       {\r
+                       float v = float(i == j ? Match : Mismatch);\r
+\r
+                       byte ui = (byte) toupper(Alphabet[i]);\r
+                       byte uj = (byte) toupper(Alphabet[j]);\r
+                       byte li = (byte) tolower(ui);\r
+                       byte lj = (byte) tolower(uj);\r
+                       ui = (byte) toupper(ui);\r
+                       uj = (byte) toupper(uj);\r
+\r
+                       g_SubstMx[ui][uj] = v;\r
+                       g_SubstMx[uj][ui] = v;\r
+\r
+                       g_SubstMx[ui][lj] = v;\r
+                       g_SubstMx[uj][li] = v;\r
+\r
+                       g_SubstMx[li][uj] = v;\r
+                       g_SubstMx[lj][ui] = v;\r
+\r
+                       g_SubstMx[li][lj] = v;\r
+                       g_SubstMx[lj][li] = v;\r
+                       }\r
+               }\r
+\r
+       for (unsigned j = 0; j < N; ++j)\r
+               {\r
+               float v = 0.0f;\r
+\r
+               byte ui = (byte) 'N';\r
+               byte uj = (byte) toupper(Alphabet[j]);\r
+               byte li = (byte) 'n';\r
+               byte lj = (byte) tolower(uj);\r
+               ui = (byte) toupper(ui);\r
+               uj = (byte) toupper(uj);\r
+\r
+               g_SubstMx[ui][uj] = v;\r
+               g_SubstMx[uj][ui] = v;\r
+\r
+               g_SubstMx[ui][lj] = v;\r
+               g_SubstMx[uj][li] = v;\r
+\r
+               g_SubstMx[li][uj] = v;\r
+               g_SubstMx[lj][ui] = v;\r
+\r
+               g_SubstMx[li][lj] = v;\r
+               g_SubstMx[lj][li] = v;\r
+               }\r
+       }\r
diff --git a/sfasta.cpp b/sfasta.cpp
new file mode 100644 (file)
index 0000000..5e794c6
--- /dev/null
@@ -0,0 +1,467 @@
+#include "sfasta.h"\r
+#include "orf.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+static inline bool isgap(byte c)\r
+       {\r
+       return c == '-' || c == '.';\r
+       }\r
+\r
+const unsigned BufferSize = 16*1024*1024;\r
+\r
+static unsigned GetMaxPoly(const byte *Seq, unsigned L)\r
+       {\r
+       byte CurrChar = Seq[0];\r
+       unsigned Start = 0;\r
+       unsigned MaxLen = 1;\r
+       for (unsigned i = 1; i < L; ++i)\r
+               {\r
+               char c = Seq[i];\r
+               if (c != CurrChar || i+1 == L)\r
+                       {\r
+                       unsigned Len = i - Start;\r
+                       if (Len > MaxLen)\r
+                               MaxLen = Len;\r
+                       CurrChar = c;\r
+                       Start = i;\r
+                       }\r
+               }\r
+       return MaxLen;\r
+       }\r
+\r
+SFasta::SFasta()\r
+       {\r
+       m_FileName = "";\r
+       m_File = 0;\r
+       m_Buffer = 0;\r
+       m_BufferSize = 0;\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       m_FileSize = 0;\r
+       m_Label = 0;\r
+       m_SeqLength = 0;\r
+       m_TooShortCount = 0;\r
+       m_TooLongCount = 0;\r
+       m_ShortestLength = 0;\r
+       m_LongestLength = 0;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       }\r
+\r
+SFasta::~SFasta()\r
+       {\r
+       Clear();\r
+       }\r
+\r
+void SFasta::Clear()\r
+       {\r
+       MYFREE(m_Buffer, m_BufferSize, SFasta);\r
+       if (m_File != 0)\r
+               CloseStdioFile(m_File);\r
+\r
+       m_FileName = "";\r
+       m_File = 0;\r
+       m_Buffer = 0;\r
+       m_BufferSize = 0;\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       m_FileSize = 0;\r
+       m_Label = 0;\r
+       m_SeqLength = 0;\r
+       m_SeqIndex = UINT_MAX;\r
+       m_AllowGaps = false;\r
+       m_IsNucleo = false;\r
+       m_IsNucleoSet = false;\r
+       m_TooShortCount = 0;\r
+       m_TooLongCount = 0;\r
+       m_ShortestLength = 0;\r
+       m_LongestLength = 0;\r
+       m_TooPolyCount = 0;\r
+       }\r
+\r
+void SFasta::LogMe() const\r
+       {\r
+       Log("\n");\r
+       Log("SFasta::LogMe()\n");\r
+       Log("FileName=%s\n", m_FileName.c_str());\r
+       Log("FileSize=%u\n", (unsigned) m_FileSize);\r
+       Log("FilePos=%u\n", (unsigned) m_FilePos);\r
+       Log("BufferSize=%u\n", m_BufferSize);\r
+       Log("BufferPos=%u\n", m_BufferOffset);\r
+       Log("BufferBytes=%u\n", m_BufferBytes);\r
+       if (m_Label == 0)\r
+               Log("Label=NULL\n");\r
+       else\r
+               Log("Label=%s\n", m_Label);\r
+       Log("SeqLength=%u\n", m_SeqLength);\r
+       }\r
+\r
+const byte *SFasta::GetNextSeq()\r
+       {\r
+       for (;;)\r
+               {\r
+               const byte *Seq = GetNextSeqLo();\r
+               if (Seq == 0)\r
+                       {\r
+                       if (m_TooShortCount > 0)\r
+                               Warning("%u short sequences (--minlen %u, shortest %u) discarded from %s",\r
+                                 m_TooShortCount, opt_minlen, m_ShortestLength, m_FileName.c_str());\r
+                       if (m_TooLongCount > 0)\r
+                               Warning("%u long sequences (--maxlen %u, longest %u) discarded from %s",\r
+                                 m_TooLongCount, opt_maxlen, m_LongestLength, m_FileName.c_str());\r
+                       if (m_TooPolyCount > 0)\r
+                               Warning("%u sequences with long homopolymers discarded (--maxpoly %u)",\r
+                                 m_TooPolyCount, opt_maxpoly);\r
+                       return 0;\r
+                       }\r
+               if (m_SeqLength < opt_minlen)\r
+                       {\r
+                       ++m_TooShortCount;\r
+                       if (m_ShortestLength == 0 || m_SeqLength < m_ShortestLength)\r
+                               m_ShortestLength = m_SeqLength;\r
+                       continue;\r
+                       }\r
+               if (m_SeqLength > opt_maxlen && opt_maxlen != 0)\r
+                       {\r
+                       if (m_LongestLength == 0 || m_SeqLength > m_LongestLength)\r
+                               m_LongestLength = m_SeqLength;\r
+                       ++m_TooLongCount;\r
+                       continue;\r
+                       }\r
+               return Seq;\r
+               }\r
+       }\r
+\r
+const byte *SFasta::GetNextSeqLo()\r
+       {\r
+// End of cache?\r
+       if (m_BufferOffset == m_BufferBytes)\r
+               {\r
+       // End of file?\r
+               if (m_FilePos == m_FileSize)\r
+                       return 0;\r
+               FillCache();\r
+               }\r
+\r
+       StartTimer(SF_GetNextSeq);\r
+       asserta(m_Buffer[m_BufferOffset] == '>');\r
+       m_Label = (char *) (m_Buffer + m_BufferOffset + 1);\r
+       \r
+//// Scan to end-of-line.\r
+//// Use dubious library function strchr() in the hope\r
+//// that it uses fast machine code.\r
+//     byte *ptr = (byte *) strchr(m_Label, '\n');\r
+//     asserta(ptr != 0);\r
+//     *ptr = 0;\r
+\r
+       byte *ptr = 0;\r
+       for (unsigned i = m_BufferOffset; i < m_BufferSize; ++i)\r
+               {\r
+               char c = m_Buffer[i];\r
+               if (c == '\n' || c == '\r')\r
+                       {\r
+                       ptr = m_Buffer + i;\r
+                       break;\r
+                       }\r
+               }\r
+       asserta(ptr != 0);\r
+\r
+       if (opt_trunclabels)\r
+               {\r
+               for (char *p = m_Label; *p; ++p)\r
+                       if (isspace(*p))\r
+                               {\r
+                               *p = 0;\r
+                               break;\r
+                               }\r
+               }\r
+       else\r
+               {\r
+               for (char *p = m_Label; *p; ++p)\r
+                       {\r
+                       if (*p == '\t')\r
+                               *p = ' ';\r
+                       else if (*p == '\r' || *p == '\n')\r
+                               {\r
+                               *p = 0;\r
+                               char NextChar = *(p+1);\r
+                               if (NextChar == '\r' || NextChar == '\n')\r
+                                       ++p;\r
+                               break;\r
+                               }\r
+                       }\r
+               }\r
+\r
+// ptr points to end-of-line.\r
+// Move to start of sequence data.\r
+       byte *Seq = ++ptr;\r
+\r
+// Delete white space in-place\r
+       byte *To = ptr;\r
+       m_BufferOffset = (unsigned) (ptr - m_Buffer);\r
+       while (m_BufferOffset < m_BufferBytes)\r
+               {\r
+               byte c = m_Buffer[m_BufferOffset];\r
+               if (c == '>')\r
+                       {\r
+                       char prevc = '\n';\r
+                       if (m_BufferOffset > 0)\r
+                               prevc = m_Buffer[m_BufferOffset-1];\r
+                       if (prevc == '\n' || prevc == '\r')\r
+                               break;\r
+                       }\r
+               ++m_BufferOffset;\r
+               if (isalpha(c) || (isgap(c) && m_AllowGaps))\r
+                       *To++ = c;\r
+               else if (c == '\n' || c == '\r')\r
+                       continue;\r
+               else\r
+                       {\r
+                       const char *Label = (m_Label == 0 ? "" : m_Label);\r
+                       static bool WarningDone = false;\r
+                       if (!WarningDone)\r
+                               {\r
+                               if (isgap(c))\r
+                                       Warning("Ignoring gaps in FASTA file '%s'",\r
+                                               m_FileName.c_str());\r
+                               else if (isprint(c))\r
+                                       Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s",\r
+                                         m_FileName.c_str(), c, Label);\r
+                               else\r
+                                       Warning("Invalid FASTA file '%s', non-printing byte (hex %02x) in sequence >%s",\r
+                                         m_FileName.c_str(), c, Label);\r
+                               WarningDone = true;\r
+                               }\r
+                       continue;\r
+                       }\r
+               }\r
+       m_SeqLength = unsigned(To - Seq);\r
+\r
+       if (m_SeqIndex == UINT_MAX)\r
+               m_SeqIndex = 0;\r
+       else\r
+               ++m_SeqIndex;\r
+\r
+       EndTimer(SF_GetNextSeq);\r
+       return Seq;\r
+       }\r
+\r
+void SFasta::Open(const string &FileName)\r
+       {\r
+       Clear();\r
+       m_FileName = FileName;\r
+       m_File = OpenStdioFile(FileName);\r
+       m_BufferSize = BufferSize;\r
+       //m_Buffer = myalloc<byte>(m_BufferSize);\r
+       m_Buffer = MYALLOC(byte, m_BufferSize, SFasta);\r
+       m_FileSize = GetStdioFileSize(m_File);\r
+       }\r
+\r
+void SFasta::Rewind()\r
+       {\r
+       m_BufferOffset = 0;\r
+       m_BufferBytes = 0;\r
+       m_FilePos = 0;\r
+       }\r
+\r
+bool SFasta::SetIsNucleo()\r
+       {\r
+       if (m_FilePos != 0)\r
+               Die("SFasta::IsNucleo, not at BOF");\r
+\r
+       unsigned LetterCount = 0;\r
+       unsigned NucleoLetterCount = 0;\r
+       for (;;)\r
+               {\r
+               const byte *Seq = GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+               unsigned L = GetSeqLength();\r
+               for (unsigned i = 0; i < L; ++i)\r
+                       if (g_IsNucleoChar[Seq[i]])\r
+                               ++NucleoLetterCount;\r
+               LetterCount += L;\r
+               if (LetterCount > 256)\r
+                       break;\r
+               }\r
+       Rewind();\r
+       if (LetterCount == 0)\r
+               {\r
+               m_IsNucleoSet = true;\r
+               m_IsNucleo = true;\r
+               return true;\r
+               }\r
+\r
+// Nucleo if more than 90% nucleo letters AGCTUN\r
+       m_IsNucleo = double(NucleoLetterCount)/LetterCount > 0.9;\r
+       m_IsNucleoSet = true;\r
+       return m_IsNucleo;\r
+       }\r
+\r
+void SFasta::FillCache()\r
+       {\r
+       StartTimer(SF_FillCache);\r
+       asserta(m_FilePos < m_FileSize);\r
+\r
+// off_t may be larger type than unsigned, e.g. 64- vs. 32-bit.\r
+       off_t otBytesToRead = m_FileSize - m_FilePos;\r
+\r
+       bool FinalBuffer = true;\r
+       if (otBytesToRead > (off_t) m_BufferSize)\r
+               {\r
+               FinalBuffer = false;\r
+               otBytesToRead = m_BufferSize;\r
+               }\r
+\r
+       unsigned BytesToRead = unsigned(otBytesToRead);\r
+       asserta(BytesToRead > 0);\r
+       asserta(BytesToRead <= m_BufferSize);\r
+\r
+       SetStdioFilePos(m_File, m_FilePos);\r
+       ReadStdioFile(m_File, m_Buffer, BytesToRead);\r
+       if (m_Buffer[0] != '>')\r
+               {\r
+               if (m_FilePos == 0)\r
+                       Die("Input is not FASTA file");\r
+               else\r
+                       Die("SFasta::FillCache() failed, expected '>'");\r
+               }\r
+\r
+       m_BufferOffset = 0;\r
+\r
+// If last buffer in file, done\r
+       if (FinalBuffer)\r
+               {\r
+               m_BufferBytes = BytesToRead;\r
+               m_FilePos += BytesToRead;\r
+               EndTimer(SF_FillCache);\r
+               return;\r
+               }\r
+\r
+// If not last buffer, truncate any partial sequence\r
+// at end of buffer. Search backwards to find last '>'.\r
+       byte *ptr = m_Buffer + BytesToRead - 1;\r
+       while (ptr > m_Buffer)\r
+               {\r
+               if (ptr[0] == '>' && (ptr[-1] == '\n' || ptr[-1] == '\r'))\r
+                       break;\r
+               --ptr;\r
+               }\r
+\r
+       if (ptr == m_Buffer)\r
+               {\r
+               LogMe();\r
+               if (*ptr != '>')\r
+                       {\r
+       // No '>' found.\r
+       // This might techincally be legal FASTA if the entire\r
+       // buffer is white space, but strange if not the last buffer\r
+       // in the file, so quit anyway.\r
+                       Die("Failed to find '>' (pos=%u, bytes=%u)",\r
+                         (unsigned) m_FilePos, BytesToRead);\r
+                       }\r
+               else\r
+                       {\r
+       // Entire buffer is one sequence which may be truncated.\r
+                       Die("Sequence too long (pos=%u, bytes=%u)",\r
+                         (unsigned) m_FilePos, BytesToRead);\r
+                       }\r
+               }\r
+\r
+       asserta(*ptr == '>');\r
+\r
+       m_BufferBytes = unsigned(ptr - m_Buffer);\r
+       m_FilePos += m_BufferBytes;\r
+\r
+       EndTimer(SF_FillCache);\r
+       }\r
+\r
+unsigned SFasta::GetPctDoneX10() const\r
+       {\r
+       if (m_FilePos == 0 || m_FileSize == 0)\r
+               return 0;\r
+\r
+       assert(m_FilePos >= (off_t) m_BufferBytes);\r
+       off_t BufferStart = m_FilePos - m_BufferBytes;\r
+       off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+       unsigned iPctX10 = unsigned(10.0*double(BufferPos)*100.0/double(m_FileSize));\r
+       if (iPctX10 == 0)\r
+               return 1;\r
+       if (iPctX10 >= 999)\r
+               return 998;\r
+       return iPctX10;\r
+       }\r
+\r
+double SFasta::GetPctDone() const\r
+       {\r
+       if (m_FilePos == 0 || m_FileSize == 0)\r
+               return 0;\r
+\r
+       assert(m_FilePos >= (off_t) m_BufferBytes);\r
+       off_t BufferStart = m_FilePos - m_BufferBytes;\r
+       off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+       return double(BufferPos)*100.0/double(m_FileSize);\r
+       }\r
+\r
+bool SFasta::GetNextSD(SeqData &SD)\r
+       {\r
+       SD.Seq = GetNextSeq();\r
+       if (SD.Seq == 0)\r
+               return false;\r
+\r
+       SD.Label = GetLabel();\r
+       SD.L = GetSeqLength();\r
+       SD.Index = GetSeqIndex();\r
+       SD.ORFParent = 0;\r
+       SD.Nucleo = GetIsNucleo();\r
+       SD.RevComp = false;\r
+\r
+       return true;\r
+       }\r
+\r
+#if    TEST\r
+void TestSFasta()\r
+       {\r
+       SFasta SF;\r
+       SF.Open(opt_input);\r
+\r
+       if (opt_verbose)\r
+               {\r
+               Log("  Index   Length  Label\n");\r
+               Log("-------  -------  -----\n");\r
+               }\r
+\r
+       unsigned Index = 0;\r
+       unsigned SeqCount = 0;\r
+       double LetterCount = 0.0;\r
+       ProgressStep(0, 1000, "Reading");\r
+       for (;;)\r
+               {\r
+               const byte *Seq = SF.GetNextSeq();\r
+               if (Seq == 0)\r
+                       break;\r
+               ProgressStep(SF.GetPctDoneX10(), 1000, "Reading");\r
+               const char *Label = SF.GetLabel();\r
+               unsigned L = SF.GetSeqLength();\r
+               ++SeqCount;\r
+               LetterCount += L;\r
+\r
+               if (opt_verbose)\r
+                       {\r
+                       Log(">%7u  %7u  '%s'\n", Index, L, Label);\r
+                       Log("+%7.7s  %7.7s  \"%*.*s\"\n", "", "", L, L, Seq);\r
+                       }\r
+\r
+               ++Index;\r
+               }\r
+       ProgressStep(999, 1000, "Reading");\r
+\r
+       Progress("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+       Log("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+       }\r
+#endif // TEST\r
diff --git a/sfasta.h b/sfasta.h
new file mode 100644 (file)
index 0000000..ed2f2ff
--- /dev/null
+++ b/sfasta.h
@@ -0,0 +1,93 @@
+#ifndef sfasta_h\r
+#define sfasta_h\r
+\r
+#include "myutils.h"\r
+#include "seq.h"\r
+\r
+typedef void (*ON_START_XSEQ)(const SeqData &SD);\r
+typedef void (*ON_END_XSEQ)(const SeqData &SD);\r
+\r
+// Sequential reader for FASTA file format.\r
+// Serves sequences in file order to save memory.\r
+// Caches biggish chunks to compromise memory vs. speed.\r
+class SFasta\r
+       {\r
+public:\r
+       string m_FileName;\r
+       FILE *m_File;\r
+       bool m_AllowGaps;\r
+\r
+       off_t m_FileSize;\r
+\r
+// Position to start next read\r
+       off_t m_FilePos;\r
+\r
+// Cached data.\r
+       byte *m_Buffer;\r
+\r
+// Bytes allocated to m_Buffer\r
+       unsigned m_BufferSize;\r
+\r
+// Current position in buffer, normally points to '>'\r
+       unsigned m_BufferOffset;\r
+\r
+// File data in buffer <= m_BufferSize\r
+       unsigned m_BufferBytes;\r
+\r
+// Current label\r
+// Points into m_Buffer, not a separate buffer.\r
+       char *m_Label;\r
+\r
+// Current sequence length\r
+       unsigned m_SeqLength;\r
+\r
+// Current seq index\r
+       unsigned m_SeqIndex;\r
+\r
+       unsigned m_ShortestLength;\r
+       unsigned m_LongestLength;\r
+       unsigned m_TooShortCount;\r
+       unsigned m_TooLongCount;\r
+       unsigned m_TooPolyCount;\r
+\r
+private:\r
+       bool m_IsNucleoSet;\r
+       bool m_IsNucleo;\r
+\r
+public:\r
+       SFasta();\r
+       ~SFasta();\r
+\r
+       void Clear();\r
+       void Open(const string &FileName);\r
+       void Rewind();\r
+       bool SetIsNucleo();\r
+       bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; };\r
+\r
+// Get next sequence.\r
+// Returns zero on end-of-file\r
+       const byte *GetNextSeq();\r
+\r
+// Get next sequence as SeqData object, return false on end-of-file.\r
+       bool GetNextSD(SeqData &SD);\r
+\r
+// Length of most recent sequence returned by GetNextSeq().\r
+       unsigned GetSeqLength() const { return m_SeqLength; }\r
+\r
+// Label of most recent sequence returned by GetNextSeq().\r
+       const char *GetLabel() const { return m_Label; }\r
+\r
+// Index of most recent sequence returned by GetNextSeq().\r
+       unsigned GetSeqIndex() const { return m_SeqIndex; }\r
+\r
+       unsigned GetPctDoneX10() const;\r
+       double GetPctDone() const;\r
+\r
+       void LogMe() const;\r
+\r
+private:\r
+       void FillCache();\r
+       const byte *GetNextSeqLo();\r
+       };\r
+\r
+#endif // sfasta_h\r
index 8e162eed63a4f1a4b5ca026dcb09bb60f2b31743..30761e10d4a3048d1af36071471865bd793e28b7 100644 (file)
@@ -56,7 +56,7 @@ vector<string> SummarySharedCommand::setParameters(){
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
diff --git a/svnmods.h b/svnmods.h
new file mode 100644 (file)
index 0000000..c68513e
--- /dev/null
+++ b/svnmods.h
@@ -0,0 +1,15 @@
+"Path: .\n"
+"URL: file:///public/svn/usearch\n"
+"Repository Root: file:///public/svn/usearch\n"
+"Repository UUID: 58640331-1837-4c17-bc3e-636dc59aced1\n"
+"Revision: 34\n"
+"Node Kind: directory\n"
+"Schedule: normal\n"
+"Last Changed Author: bob\n"
+"Last Changed Rev: 34\n"
+"Last Changed Date: 2011-05-01 08:29:04 -0700 (Sun, 01 May 2011)\n"
+"\n"
+"?       mk\n"
+"!       svnmods.h\n"
+"M       ungappedblastid.cpp\n"
+"M       chaindisjointhits.cpp\n"
diff --git a/svnversion.h b/svnversion.h
new file mode 100644 (file)
index 0000000..2a64d50
--- /dev/null
@@ -0,0 +1 @@
+"40"
diff --git a/timers.h b/timers.h
new file mode 100644 (file)
index 0000000..81cf7d1
--- /dev/null
+++ b/timers.h
@@ -0,0 +1,173 @@
+T(MxBase_Alloc)\r
+T(MxBase_FreeData)\r
+T(MxBase_AllocData)\r
+T(SortSeqIndexes)\r
+T(Alloc_Vectors)\r
+T(MainLoop_NotNW)\r
+T(WriteOutput)\r
+T(NWB)\r
+T(ReadAllStdioFile)\r
+T(Windex_Init)\r
+T(Windex_SetSeqIndex)\r
+T(SeqToWords)\r
+T(SeqToWordsStep)\r
+T(SeqToShortWords)\r
+T(SeqToShortWordsA)\r
+T(SeqToShortWordsB)\r
+T(GetFractIdB)\r
+T(Windex_UniqueWordsAlloc)\r
+T(Windex_UniqueWords)\r
+T(GetPctId)\r
+T(Windex_Reset)\r
+T(GetSig)\r
+T(NWEditDist)\r
+T(EditDist_Myers)\r
+T(EditDist_BlockTarget)\r
+T(NWBand)\r
+T(WordCounting)\r
+T(NWAff)\r
+T(NWAffBand)\r
+T(NWSimple)\r
+T(NWSimpleB)\r
+T(BandWrap)\r
+T(IncIdCounts)\r
+T(GetBestDiagB)\r
+T(GetBestDiagB1)\r
+T(GetBestDiagB2)\r
+T(ClusterInit)\r
+T(ClusterPrep)\r
+T(HotSort1)\r
+T(HotSort2)\r
+T(SortA)\r
+T(SortB)\r
+T(CountSort)\r
+T(AddWords)\r
+T(ClusterWindex)\r
+T(MainInit)\r
+T(Output)\r
+T(WindexTail)\r
+T(WindexExit)\r
+T(Sort)\r
+T(U_AllocSeqLength)\r
+T(U_AllocSeedCount)\r
+T(U_AddSeed)\r
+T(AddSeq)\r
+T(U_SetWordCounts)\r
+T(U_SetWordCountsHash)\r
+T(U_SetWordScores)\r
+T(U_SetHotHits)\r
+T(U_SetHotHitsHash)\r
+T(U_SetHotHitsScores)\r
+T(U_Search)\r
+T(U_SearchExact)\r
+T(WF_SeqToWords)\r
+T(WF_SeqToWordsA)\r
+T(WF_SeqToWordsB)\r
+T(WF_AllocLA)\r
+T(WF_AllocLB)\r
+T(WF_AllocDiags)\r
+T(WF_SetA)\r
+T(WF_SetA_Nb)\r
+T(WF_SetAZero)\r
+T(WF_SetA2)\r
+T(WF_SetB)\r
+T(WF_GetCommonWordCount)\r
+T(WF_GetBestDiag)\r
+T(GetFractIdGivenPath)\r
+T(WX_GetUniqueWords)\r
+T(CompressPath)\r
+T(GetHSPs1)\r
+T(GetHSPs2)\r
+T(AlignHSPs)\r
+T(WF_ResolveHSPs)\r
+T(WX_SetExcludes)\r
+T(ViterbiFast)\r
+T(ViterbiFastBand)\r
+T(ViterbiFastBand0)\r
+T(ViterbiFastBand1)\r
+T(ViterbiFastBand2)\r
+T(ViterbiFastBand3)\r
+T(ViterbiFastBand4)\r
+T(TraceBackBit)\r
+T(TraceBackBitSW)\r
+T(SF_GetNextSeq)\r
+T(SF_FillCache)\r
+T(OnGlobalAccept)\r
+T(UngappedBlast)\r
+T(UngappedBlastId)\r
+T(UngappedBlast2Hit)\r
+T(LogHSPs)\r
+T(BlastOutput)\r
+T(BlastLeft)\r
+T(BlastRight)\r
+T(Blast1)\r
+T(Blast2)\r
+T(Blast3)\r
+T(Blast4)\r
+T(GetBestSeg)\r
+T(SWLinearDP)\r
+T(SWLinearTB)\r
+T(SWLinearDP2)\r
+T(SWLinearTB2)\r
+T(Chain)\r
+T(XlatSeq)\r
+T(XlatSeqToLetters)\r
+T(XDropFwdSimple)\r
+T(XDropFwdFast)\r
+T(XDropFwdFastTB)\r
+T(XDropBwd)\r
+T(SWSimple)\r
+T(PathAlloc)\r
+T(SubPath)\r
+T(SWUngapped)\r
+T(SWFast)\r
+T(SWFastNTB)\r
+T(SWAT_CacheQuery)\r
+T(SWAT_AlignTarget)\r
+T(SWAT_CacheQueryNW)\r
+T(SWAT_AlignTargetNW)\r
+T(SeqDB_FromFasta)\r
+T(LocalUngappedHitToAD)\r
+T(LocalGappedHitToAD)\r
+T(GlobalHitToAD)\r
+T(ResolveOverlaps)\r
+T(GetORFs)\r
+T(ChainCov_AddHit)\r
+T(ChainCov_EndQuery)\r
+T(ChainCov_DoTarget)\r
+T(BuildNb)\r
+T(MakeIntSubstMx)\r
+T(UngappedExtendLeft)\r
+T(UngappedExtendRight)\r
+T(AlignSP)\r
+T(AlignHSP)\r
+\r
+// Background\r
+T(Bg_SearchLoop)\r
+T(Bg_MainInit)\r
+T(Bg_MainTerm)\r
+T(Bg_Other)\r
+T(Bg_1)\r
+T(Bg_2)\r
+T(Bg_3)\r
+T(Bg_4)\r
+T(Bg_5)\r
+T(Bg_6)\r
+T(Bg_7)\r
+T(Bg_8)\r
+T(Bg_9)\r
+T(Bg_XFrame2)\r
+T(Bg_Usearch1)\r
+T(Bg_Usearch2)\r
+T(Bg_Usearch3)\r
+T(Bg_Usearch4)\r
+T(Bg_Hot)\r
+\r
+// For Timer2\r
+T(Search_2)\r
+T(Search_Loop_2)\r
+T(Search_InnerLoop_2)\r
+T(OnHit_2)\r
+T(UngappedBlast_2)\r
+T(MainInit_2)\r
+T(MainTerm_2)\r
diff --git a/timing.h b/timing.h
new file mode 100644 (file)
index 0000000..b566e1b
--- /dev/null
+++ b/timing.h
@@ -0,0 +1,238 @@
+#define TIMING 0
+#ifndef timing_h
+#define timing_h
+
+#define BG_TIMING      0
+
+#if !TIMING
+#undef BG_TIMING
+#define BG_TIMING      0
+#endif
+
+//#if  UCHIMES
+#undef TIMING
+#define TIMING 0
+//#endif
+
+#if TIMING
+
+enum TIMER
+       {
+       TIMER_None,
+#define T(x)   TIMER_##x,
+#include "timers.h"
+#undef T
+       };
+
+const unsigned TimerCount =
+       1       // TIMER_None
+#define T(x)   +1
+#include "timers.h"
+#undef T
+       ;
+
+enum COUNTER
+       {
+#define C(x)   COUNTER_##x,
+#include "counters.h"
+#undef C
+       };
+
+enum ALLOCER
+       {
+#define A(x)   ALLOCER_##x,
+#include "allocs.h"
+#undef A
+       };
+
+const unsigned CounterCount =
+#define C(x)   +1
+#include "counters.h"
+#undef C
+       ;
+
+const unsigned AllocerCount =
+#define A(x)   +1
+#include "allocs.h"
+#undef A
+       ;
+
+#ifdef _MSC_VER
+
+typedef unsigned __int64 TICKS;
+
+#pragma warning(disable:4035)
+inline TICKS GetClockTicks()
+       {
+       _asm
+               {
+               _emit   0x0f
+               _emit   0x31
+               }
+       }
+
+#else  // ifdef _MSC_VER
+
+typedef uint64_t TICKS;
+__inline__ uint64_t GetClockTicks()
+       {
+       uint32_t lo, hi;
+       /* We cannot use "=A", since this would use %rax on x86_64 */
+       __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+       return (uint64_t)hi << 32 | lo;
+       }
+
+#endif // ifdef _MSC_VER
+
+//void AddTicks(const string &Name, TICKS Ticks1, TICKS Ticks2);
+//void AddBytes(const string &Name, double Bytes);
+//#define SubBytes(Name, Bytes)        AddBytes(Name, -double(Bytes))
+
+const char *TimerToStr(TIMER t);
+
+extern TICKS g_BeginTicks[TimerCount];
+extern double g_TotalTicks[TimerCount];
+extern double g_TotalCounts[TimerCount];
+extern double g_Counters[CounterCount];
+extern unsigned g_AllocNewCount[AllocerCount];
+extern unsigned g_AllocFreeCount[AllocerCount];
+extern double g_AllocNewBytes[AllocerCount];
+extern double g_AllocFreeBytes[AllocerCount];
+extern double g_AllocNetBytes[AllocerCount];
+extern double g_AllocPeakBytes[AllocerCount];
+extern bool g_Timer2[TimerCount];
+extern TIMER g_CurrTimer;
+#if    BG_TIMING
+extern TIMER g_BackgroundTimer;
+#endif
+
+#define MYALLOC(Type, N, Name)         (Type *) MyAlloc_((N)*sizeof(Type), ALLOCER_##Name, __FILE__, __LINE__)
+#define MYFREE(Array, N, Name)         MyFree_(Array, N*sizeof(Array[0]), ALLOCER_##Name, __FILE__, __LINE__)
+
+inline void *MyAlloc_(unsigned Bytes, unsigned a, const char *FileName, int Line)
+       {
+       ++g_AllocNewCount[a];
+       g_AllocNewBytes[a] += Bytes;
+       g_AllocNetBytes[a] += Bytes;
+       if (g_AllocNetBytes[a] > g_AllocPeakBytes[a])
+               g_AllocPeakBytes[a] = g_AllocNetBytes[a];
+       return mymalloc(Bytes);
+       }
+
+inline void MyFree_(void *p, unsigned Bytes, unsigned a, const char *FileName, int Line)
+       {
+       ++g_AllocFreeCount[a];
+       g_AllocFreeBytes[a] += Bytes;
+       g_AllocNetBytes[a] -= Bytes;
+       myfree2(p, Bytes);
+       }
+
+#if    BG_TIMING
+inline void SetBackgroundTimer_(TIMER Timer)
+       {
+       TICKS Now = GetClockTicks();
+       if (g_BeginTicks[g_BackgroundTimer] != 0)
+               {
+               ++g_TotalCounts[g_BackgroundTimer];
+               g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+               }
+       g_BackgroundTimer = Timer;
+       g_BeginTicks[Timer] = Now;
+       }
+#else
+#define SetBackgroundTimer_(Timer)     /* empty */
+#endif
+
+inline void StartTimer_(TIMER Timer)
+       {
+       if (g_CurrTimer != TIMER_None)
+               Die("StartTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+#if    BG_TIMING
+       if (g_BeginTicks[g_BackgroundTimer] != 0)
+               {
+               ++g_TotalCounts[g_BackgroundTimer];
+               g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+               }
+#endif
+       g_BeginTicks[Timer] = Now;
+       g_CurrTimer = Timer;
+       }
+
+inline void PauseTimer_(TIMER Timer)
+       {
+       if (Timer != g_CurrTimer)
+               Die("PauseTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+       g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+       g_BeginTicks[Timer] = Now;
+       g_CurrTimer = TIMER_None;
+       }
+
+inline void EndTimer_(TIMER Timer)
+       {
+       if (Timer != g_CurrTimer)
+               Die("EndTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+       TICKS Now = GetClockTicks();
+#if    BG_TIMING
+       g_BeginTicks[g_BackgroundTimer] = Now;
+#endif
+       g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+       ++g_TotalCounts[Timer];
+       g_CurrTimer = TIMER_None;
+       }
+
+inline void StartTimer2_(TIMER Timer)
+       {
+       g_Timer2[Timer] = true;
+       g_BeginTicks[Timer] = GetClockTicks();
+       }
+
+inline void EndTimer2_(TIMER Timer)
+       {
+       g_TotalTicks[Timer] += double(GetClockTicks() - g_BeginTicks[Timer]);
+       ++g_TotalCounts[Timer];
+       }
+
+#define AddCounter(x, N)       g_Counters[COUNTER_##x] += N
+#define IncCounter(x)          ++(g_Counters[COUNTER_##x])
+#define StartTimer(x)          StartTimer_(TIMER_##x)
+#define PauseTimer(x)          PauseTimer_(TIMER_##x)
+#define EndTimer(x)                    EndTimer_(TIMER_##x)
+#define StartTimer2(x)         StartTimer2_(TIMER_##x)
+#define EndTimer2(x)           EndTimer2_(TIMER_##x)
+
+#if    BG_TIMING
+#define SetBackgroundTimer(x)  SetBackgroundTimer_(TIMER_##x)
+#else
+#define SetBackgroundTimer(x)  /* empty */
+#endif
+
+#else  // if TIMING
+
+#define AddCounter(x, N)       /* empty */
+#define IncCounter(x)          /* empty */
+#define StartTimer(x)          /* empty */
+#define PauseTimer(x)          /* empty */
+#define EndTimer(x)                    /* empty */
+#define StartTimer2(x)         /* empty */
+#define PauseTimer2(x)         /* empty */
+#define EndTimer2(x)           /* empty */
+#define SetBackgroundTimer(x)  /* empty */
+#define MYALLOC(Type, N, Name)         myalloc(Type, N)
+#define MYFREE(Array, N, Name)         myfree(Array)
+
+#endif // if TIMING
+
+void LogMemStats();
+void LogTickStats();
+void LogStats();
+void LogAllocs();
+
+#define AddBytes(x, n) /* empty */
+#define SubBytes(x, n) /* empty */
+
+#endif // if timing_h
diff --git a/tracebackbit.cpp b/tracebackbit.cpp
new file mode 100644 (file)
index 0000000..94159cd
--- /dev/null
@@ -0,0 +1,180 @@
+#include "dp.h"
+
+#define TRACE  0
+
+Mx<byte> g_Mx_TBBit;
+byte **g_TBBit;
+float *g_DPRow1;
+float *g_DPRow2;
+static float *g_DPBuffer1;
+static float *g_DPBuffer2;
+
+static unsigned g_CacheLB;
+
+void AllocBit(unsigned LA, unsigned LB)
+       {
+       g_Mx_TBBit.Alloc("TBBit", LA+1, LB+1);
+       g_TBBit = g_Mx_TBBit.GetData();
+       if (LB > g_CacheLB)
+               {
+               MYFREE(g_DPBuffer1, g_CacheLB, AllocBit);
+               MYFREE(g_DPBuffer2, g_CacheLB, AllocBit);
+
+               g_CacheLB = LB + 128;
+
+       // Allow use of [-1]
+               //g_DPBuffer1 = myalloc<float>(g_CacheLB+3);
+               //g_DPBuffer2 = myalloc<float>(g_CacheLB+3);
+               g_DPBuffer1 = MYALLOC(float, g_CacheLB+3, AllocBit);
+               g_DPBuffer2 = MYALLOC(float, g_CacheLB+3, AllocBit);
+               g_DPRow1 = g_DPBuffer1 + 1;
+               g_DPRow2 = g_DPBuffer2 + 1;
+               }
+       }
+
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD)
+       {
+       PD.Alloc(LA+LB);
+
+       StartTimer(TraceBackBit);
+       char *PathPtr = PD.Back;
+       *PathPtr = 0;
+
+       byte **TB = g_TBBit;
+
+#if    TRACE
+       Log("\n");
+       Log("TraceBackBit\n");
+#endif
+
+       size_t i = LA;
+       size_t j = LB;
+       for (;;)
+               {
+#if    TRACE
+               Log("i=%3d  j=%3d  state=%c\n", (int) i, (int) j, State);
+#endif
+               if (i == 0 && j == 0)
+                       break;
+
+               --PathPtr;
+               *PathPtr = State;
+
+               byte t;
+               switch (State)
+                       {
+               case 'M':
+                       asserta(i > 0 && j > 0);
+                       t = TB[i-1][j-1];
+                       if (t & TRACEBITS_DM)
+                               State = 'D';
+                       else if (t & TRACEBITS_IM)
+                               State = 'I';
+                       else
+                               State = 'M';
+                       --i;
+                       --j;
+                       break;
+               case 'D':
+                       asserta(i > 0);
+                       t = TB[i-1][j];
+                       if (t & TRACEBITS_MD)
+                               State = 'M';
+                       else
+                               State = 'D';
+                       --i;
+                       break;
+
+               case 'I':
+                       asserta(j > 0);
+                       t = TB[i][j-1];
+                       if (t & TRACEBITS_MI)
+                               State = 'M';
+                       else
+                               State = 'I';
+                       --j;
+                       break;
+
+               default:
+                       Die("TraceBackBit, invalid state %c", State);
+                       }
+               }
+       PD.Start = PathPtr;
+       EndTimer(TraceBackBit);
+       }
+
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,
+  unsigned &Leni, unsigned &Lenj, PathData &PD)
+       {
+       PD.Alloc(LA+LB);
+
+       StartTimer(TraceBackBitSW);
+       char *PathPtr = PD.Back;
+       *PathPtr = 0;
+
+       byte **TB = g_TBBit;
+
+#if    TRACE
+       Log("\n");
+       Log("TraceBackBitSW\n");
+#endif
+
+       unsigned i = Besti;
+       unsigned j = Bestj;
+       char State = 'M';
+       for (;;)
+               {
+#if    TRACE
+               Log("i=%3d  j=%3d  state=%c\n", (int) i, (int) j, State);
+#endif
+               --PathPtr;
+               *PathPtr = State;
+
+               byte t;
+               switch (State)
+                       {
+               case 'M':
+                       asserta(i > 0 && j > 0);
+                       t = TB[i-1][j-1];
+                       if (t & TRACEBITS_DM)
+                               State = 'D';
+                       else if (t & TRACEBITS_IM)
+                               State = 'I';
+                       else if (t & TRACEBITS_SM)
+                               {
+                               Leni = Besti - i + 1;
+                               Lenj = Bestj - j + 1;
+                               PD.Start = PathPtr;
+                               EndTimer(TraceBackBitSW);
+                               return;
+                               }
+                       else
+                               State = 'M';
+                       --i;
+                       --j;
+                       break;
+               case 'D':
+                       asserta(i > 0);
+                       t = TB[i-1][j];
+                       if (t & TRACEBITS_MD)
+                               State = 'M';
+                       else
+                               State = 'D';
+                       --i;
+                       break;
+
+               case 'I':
+                       asserta(j > 0);
+                       t = TB[i][j-1];
+                       if (t & TRACEBITS_MI)
+                               State = 'M';
+                       else
+                               State = 'I';
+                       --j;
+                       break;
+
+               default:
+                       Die("TraceBackBitSW, invalid state %c", State);
+                       }
+               }
+       }
diff --git a/uc.h b/uc.h
new file mode 100644 (file)
index 0000000..631ea36
--- /dev/null
+++ b/uc.h
@@ -0,0 +1,65 @@
+#ifndef uc_h\r
+#define uc_h\r
+\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "path.h"\r
+\r
+struct AlnData;\r
+\r
+int uchime_main(int, char**);  \r
+\r
+class UCFile\r
+       {\r
+public:\r
+       FILE *m_File;\r
+       byte *m_Data;\r
+       vector<char> m_RecTypes;\r
+       vector<float> m_PctIds;\r
+       vector<const char *> m_Labels;\r
+       vector<const char *> m_SeedLabels;\r
+       vector<unsigned> m_SeedIndexes;\r
+       vector<const char *> m_CompressedPaths;\r
+       vector<unsigned> m_SeqLengths;\r
+       vector<unsigned> m_SortOrder;\r
+       vector<char> m_Strands;\r
+       vector<unsigned> m_Los;\r
+       vector<unsigned> m_SeedLos;\r
+\r
+public:\r
+       /* some function prototypes */\r
+       \r
+               \r
+       UCFile();\r
+       void Clear(bool ctor = false);\r
+       void Close();\r
+       void FromFile(const string &FileName);\r
+       void FromClstr(const string &FileName);\r
+       void ToFile(const string &FileName);\r
+       unsigned GetRecordCount() const;\r
+       void LogMe() const;\r
+       void ToClstr(const string &FileName);\r
+       void ToFasta(const string &FileName, const SeqDB &Input, bool Reformat);\r
+       void Create(const string &FileName);\r
+       void Sort();\r
+       void Flush() const;\r
+\r
+       void WriteNotMatched(unsigned L, const char *Label) const;\r
+       void WriteLibSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+       void WriteNewSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+       void WriteHit(const SeqData &SA, const SeqData &SB, double FractId,\r
+         const PathData &PD) const;\r
+       void WriteReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+         const char *Path) const;\r
+       void WriteHit(unsigned SeedIndex, unsigned L, double PctId,\r
+         const char *CompressedPath, char Strand, unsigned Lo, unsigned SeedLo,\r
+         const char *Label, const char *SeedLabel) const;\r
+       void WriteHit(const AlnData &AD);\r
+       void WriteLibCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+         const char *Label) const;\r
+       void WriteNewCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+         const char *Label) const;\r
+       void WriteSeqX(FILE *f, const byte *Seq, unsigned L, const char *CompressedPath) const;\r
+       };\r
+\r
+#endif // uc_h\r
diff --git a/uchime_main.cpp b/uchime_main.cpp
new file mode 100644 (file)
index 0000000..40e7f44
--- /dev/null
@@ -0,0 +1,219 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "seqdb.h"\r
+#include "dp.h"\r
+#include "ultra.h"\r
+#include "hspfinder.h"\r
+#include <algorithm>\r
+#include <set>\r
+#include "mothurout.h"\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+  float MinFractId, ChimeHit2 &Hit);\r
+\r
+FILE *g_fUChime;\r
+FILE *g_fUChimeAlns;\r
+const vector<float> *g_SortVecFloat;\r
+bool g_UchimeDeNovo = false;\r
+\r
+void Usage()\r
+       {\r
+       //printf("\n");\r
+       //printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
+       //printf("http://www.drive5.com/uchime\n");\r
+       //printf("\n");\r
+       //printf("This software is donated to the public domain\n");\r
+       //printf("\n");\r
+\r
+       //printf(\r
+//#include "help.h"\r
+               //);\r
+       }\r
+\r
+void SetBLOSUM62()\r
+       {\r
+       Die("SetBLOSUM62 not implemented");\r
+       }\r
+\r
+void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
+       {\r
+       Die("ReadSubstMx not implemented");\r
+       }\r
+\r
+void LogAllocs()\r
+       {\r
+       /*empty*/\r
+       }\r
+\r
+static bool CmpDescVecFloat(unsigned i, unsigned j)\r
+       {\r
+       return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
+       }\r
+\r
+void Range(vector<unsigned> &v, unsigned N)\r
+       {\r
+       v.clear();\r
+       v.reserve(N);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               v.push_back(i);\r
+       }\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
+       {\r
+       StartTimer(Sort);\r
+       const unsigned N = SIZE(Values);\r
+       Range(Order, N);\r
+       g_SortVecFloat = &Values;\r
+       sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
+       EndTimer(Sort);\r
+       }\r
+\r
+float GetAbFromLabel(const string &Label)\r
+       {\r
+       vector<string> Fields;\r
+       Split(Label, Fields, '/');\r
+       const unsigned N = SIZE(Fields);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               const string &Field = Fields[i];\r
+               if (Field.substr(0, 3) == "ab=")\r
+                       {\r
+                       string a = Field.substr(3, string::npos);\r
+                       return (float) atof(a.c_str());\r
+                       }\r
+               }\r
+       if (g_UchimeDeNovo)\r
+               Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
+       return 0.0;\r
+       }\r
+\r
+int uchime_main(int argc, char *argv[])\r
+       {\r
+       MothurOut* m;\r
+       m = MothurOut::getInstance();\r
+               \r
+       MyCmdLine(argc, argv);\r
+\r
+       if (argc < 2)\r
+               {\r
+               Usage();\r
+               return 0;\r
+               }\r
+\r
+       if (opt_version)\r
+               {\r
+               printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+               return 0;\r
+               }\r
+\r
+       //printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+       //printf("by Robert C. Edgar\n");\r
+       //printf("http://drive5.com/uchime\n");\r
+       //printf("This code is donated to the public domain.\n");\r
+       //printf("\n");\r
+       if (!optset_w)\r
+               opt_w = 8;\r
+       \r
+       float MinFractId = 0.95f;\r
+       if (optset_id)\r
+               MinFractId = (float) opt_id;\r
+\r
+       Log("%8.2f  minh\n", opt_minh);\r
+       Log("%8.2f  xn\n", opt_xn);\r
+       Log("%8.2f  dn\n", opt_dn);\r
+       Log("%8.2f  xa\n", opt_xa);\r
+       Log("%8.2f  mindiv\n", opt_mindiv);\r
+       Log("%8u  maxp\n", opt_maxp);\r
+\r
+       if (opt_input == "" && opt_uchime != "")\r
+               opt_input = opt_uchime;\r
+\r
+       if (opt_input == "")\r
+               Die("Missing --input");\r
+\r
+       g_UchimeDeNovo = (opt_db == "");\r
+\r
+       if (opt_uchimeout != "")\r
+               g_fUChime = CreateStdioFile(opt_uchimeout);\r
+\r
+       if (opt_uchimealns != "")\r
+               g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
+\r
+       SeqDB Input;\r
+       SeqDB DB;\r
+\r
+       Input.FromFasta(opt_input);\r
+       if (!Input.IsNucleo())\r
+               Die("Input contains amino acid sequences");\r
+\r
+       const unsigned QuerySeqCount = Input.GetSeqCount();\r
+       vector<unsigned> Order;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               Order.push_back(i);\r
+\r
+       if (g_UchimeDeNovo)\r
+               {\r
+               vector<float> Abs;\r
+               for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+                       {\r
+                       const char *Label = Input.GetLabel(i);\r
+                       float Ab = GetAbFromLabel(Label);\r
+                       Abs.push_back(Ab);\r
+                       }\r
+               SortDescending(Abs, Order);\r
+               DB.m_IsNucleoSet = true;\r
+               DB.m_IsNucleo = true;\r
+               }\r
+       else\r
+               {\r
+               DB.FromFasta(opt_db);\r
+               if (!DB.IsNucleo())\r
+                       Die("Database contains amino acid sequences");\r
+               }\r
+\r
+       vector<ChimeHit2> Hits;\r
+       unsigned HitCount = 0;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               {\r
+                       \r
+               if (m->control_pressed) { break; }\r
+                       \r
+               unsigned QuerySeqIndex = Order[i];\r
+\r
+               SeqData QSD;\r
+               Input.GetSeqData(QuerySeqIndex, QSD);\r
+\r
+               float QAb = -1.0;\r
+               if (g_UchimeDeNovo)\r
+                       QAb = GetAbFromLabel(QSD.Label);\r
+\r
+               ChimeHit2 Hit;\r
+               AlnParams &AP = *(AlnParams *) 0;\r
+               AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
+               HSPFinder &HF = *(HSPFinder *) 0;\r
+               bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
+               if (Found)\r
+                       ++HitCount;\r
+               else\r
+                       {\r
+                       if (g_UchimeDeNovo)\r
+                               DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
+                       }\r
+\r
+               WriteChimeHit(g_fUChime, Hit);\r
+\r
+               ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
+               \r
+               }\r
+\r
+       Log("\n");\r
+       Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
+         opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
+\r
+       CloseStdioFile(g_fUChime);\r
+       CloseStdioFile(g_fUChimeAlns);\r
+\r
+       ProgressExit();\r
+       return 0;\r
+       }\r
diff --git a/ultra.h b/ultra.h
new file mode 100644 (file)
index 0000000..e0a432f
--- /dev/null
+++ b/ultra.h
@@ -0,0 +1,8 @@
+#ifndef ultra_h
+#define ultra_h
+
+#include "seqdb.h"
+#define Ultra SeqDB
+#define GetSeedLabel GetLabel
+
+#endif // ultra_h
diff --git a/usort.cpp b/usort.cpp
new file mode 100644 (file)
index 0000000..7afbf42
--- /dev/null
+++ b/usort.cpp
@@ -0,0 +1,86 @@
+//#if  UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "alpha.h"\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+\r
+static byte *g_QueryHasWord;\r
+static unsigned g_WordCount;\r
+\r
+unsigned GetWord(const byte *Seq)\r
+       {\r
+       unsigned Word = 0;\r
+       const byte *Front = Seq;\r
+       for (unsigned i = 0; i < opt_w; ++i)\r
+               {\r
+               unsigned Letter = g_CharToLetterNucleo[*Front++];\r
+               Word = (Word*4) + Letter;\r
+               }\r
+       return Word;\r
+       }\r
+\r
+static void SetQuery(const SeqData &Query)\r
+       {\r
+       if (g_QueryHasWord == 0)\r
+               {\r
+               g_WordCount = 4;\r
+               for (unsigned i = 1; i < opt_w; ++i)\r
+                       g_WordCount *= 4;\r
+\r
+               g_QueryHasWord = myalloc(byte, g_WordCount);\r
+               }\r
+\r
+       memset(g_QueryHasWord, 0, g_WordCount);\r
+\r
+       if (Query.L <= opt_w)\r
+               return;\r
+\r
+       const unsigned L = Query.L - opt_w + 1;\r
+       const byte *Seq = Query.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               g_QueryHasWord[Word] = 1;\r
+               }\r
+       }\r
+\r
+static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
+       {\r
+       if (Target.L <= opt_w)\r
+               return 0;\r
+\r
+       unsigned Count = 0;\r
+       const unsigned L = Target.L - opt_w + 1;\r
+       const byte *Seq = Target.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               if (g_QueryHasWord[Word])\r
+                       ++Count;\r
+               }\r
+       return Count;\r
+       }\r
+\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
+  vector<unsigned> &Order)\r
+       {\r
+       WordCounts.clear();\r
+       Order.clear();\r
+\r
+       SetQuery(Query);\r
+\r
+       const unsigned SeqCount = DB.GetSeqCount();\r
+       for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+               {\r
+               SeqData Target;\r
+               DB.GetSeqData(SeqIndex, Target);\r
+               float WordCount = (float) GetUniqueWordsInCommon(Target);\r
+               WordCounts.push_back(WordCount);\r
+               }\r
+       SortDescending(WordCounts, Order);\r
+       }\r
+\r
+//#endif // UCHIMES\r
index a22ece24c3c83943d90971f51026ddeb6ddd3c73..7df7c4e28d65cb2f7853500728075e8edc161723 100644 (file)
@@ -177,7 +177,7 @@ void ValidCalculators::printCitations(vector<string> Estimators) {
                                }else if (Estimators[i] == "ochiai") { Calculator* temp = new Ochiai(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
                                
                                }else if (Estimators[i] == "anderberg") { Calculator* temp = new Anderberg(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
-                               }else if (Estimators[i] == "skulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
+                               }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
                                }else if (Estimators[i] == "kulczynskicody") { Calculator* temp = new KulczynskiCody(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; 
                                }else if (Estimators[i] == "lennon") { Calculator* temp = new Lennon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "morisitahorn") { Calculator* temp = new MorHorn(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
diff --git a/viterbifast.cpp b/viterbifast.cpp
new file mode 100644 (file)
index 0000000..2b20174
--- /dev/null
@@ -0,0 +1,378 @@
+#include "dp.h"
+#include "out.h"
+#include "evalue.h"
+
+#define CMP_SIMPLE     0
+\r
+#if    SAVE_FAST
+static Mx<float> g_MxDPM;
+static Mx<float> g_MxDPD;
+static Mx<float> g_MxDPI;
+
+static Mx<char> g_MxTBM;
+static Mx<char> g_MxTBD;
+static Mx<char> g_MxTBI;
+
+static float **g_DPM;
+static float **g_DPD;
+static float **g_DPI;
+
+static char **g_TBM;
+static char **g_TBD;
+static char **g_TBI;
+
+#if    CMP_SIMPLE
+static Mx<float> *g_DPMSimpleMx;
+static Mx<float> *g_DPDSimpleMx;
+static Mx<float> *g_DPISimpleMx;
+static float **g_DPMSimple;
+static float **g_DPDSimple;
+static float **g_DPISimple;
+
+#define cmpm(i, j, x)  { if (!feq(x, g_DPMSimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPMSimple[i][j]); \
+                                                       } \
+                                               }
+
+#define cmpd(i, j, x)  { if (!feq(x, g_DPDSimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPDSimple[i][j]); \
+                                                       } \
+                                               }
+
+#define cmpi(i, j, x)  { if (!feq(x, g_DPISimple[i][j])) \
+                                                       { \
+                                                       Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+                                                         __FILE__, __LINE__, x, i, j, g_DPISimple[i][j]); \
+                                                       } \
+                                               }
+
+#else
+
+#define cmpm(i, j, x)  /* empty */
+#define cmpd(i, j, x)  /* empty */
+#define cmpi(i, j, x)  /* empty */
+
+#endif
+
+static void AllocSave(unsigned LA, unsigned LB)
+       {
+#if    CMP_SIMPLE
+       GetSimpleDPMxs(&g_DPMSimpleMx, &g_DPDSimpleMx, &g_DPISimpleMx);
+       g_DPMSimple = g_DPMSimpleMx->GetData();
+       g_DPDSimple = g_DPDSimpleMx->GetData();
+       g_DPISimple = g_DPISimpleMx->GetData();
+#endif
+       g_MxDPM.Alloc("FastM", LA+1, LB+1);\r
+       g_MxDPD.Alloc("FastD", LA+1, LB+1);\r
+       g_MxDPI.Alloc("FastI", LA+1, LB+1);\r
+\r
+       g_MxTBM.Alloc("FastTBM", LA+1, LB+1);\r
+       g_MxTBD.Alloc("FastTBD", LA+1, LB+1);\r
+       g_MxTBI.Alloc("FastTBI", LA+1, LB+1);\r
+\r
+       g_DPM = g_MxDPM.GetData();\r
+       g_DPD = g_MxDPD.GetData();\r
+       g_DPI = g_MxDPI.GetData();\r
+\r
+       g_TBM = g_MxTBM.GetData();\r
+       g_TBD = g_MxTBD.GetData();\r
+       g_TBI = g_MxTBI.GetData();\r
+       }
+
+static void SAVE_DPM(unsigned i, unsigned j, float x)
+       {
+       g_DPM[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPMSimple[i][j]));
+#endif
+       }
+
+static void SAVE_DPD(unsigned i, unsigned j, float x)
+       {
+       g_DPD[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPDSimple[i][j]));
+#endif
+       }
+
+static void SAVE_DPI(unsigned i, unsigned j, float x)
+       {
+       g_DPI[i][j] = x;
+#if    CMP_SIMPLE
+       if (i > 0 && j > 0)
+       asserta(feq(x, g_DPISimple[i][j]));
+#endif
+       }
+
+static void SAVE_TBM(unsigned i, unsigned j, char x)
+       {
+       g_TBM[i][j] = x;
+       }
+
+static void SAVE_TBD(unsigned i, unsigned j, char x)
+       {
+       g_TBD[i][j] = x;
+       }
+
+static void SAVE_TBI(unsigned i, unsigned j, char x)
+       {
+       g_TBI[i][j] = x;
+       }
+
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I)
+       {
+       *M = &g_MxDPM;
+       *D = &g_MxDPD;
+       *I = &g_MxDPI;
+       }
+
+#else  // SAVE_FAST
+
+#define        SAVE_DPM(i, j, x)       /* empty */
+#define        SAVE_DPD(i, j, x)       /* empty */
+#define        SAVE_DPI(i, j, x)       /* empty */
+
+#define        SAVE_TBM(i, j, x)       /* empty */
+#define        SAVE_TBD(i, j, x)       /* empty */
+#define        SAVE_TBI(i, j, x)       /* empty */
+
+#define AllocSave(LA, LB)      /* empty */
+
+#define cmpm(i, j, x)  /* empty */
+#define cmpd(i, j, x)  /* empty */
+#define cmpi(i, j, x)  /* empty */
+
+#endif // SAVE_FAST
+
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,
+  const AlnParams &AP, PathData &PD)
+       {
+       if (LA*LB > 100*1000*1000)
+               Die("ViterbiFast, too long LA=%u, LB=%u", LA, LB);
+
+       AllocBit(LA, LB);
+       AllocSave(LA, LB);
+       
+       StartTimer(ViterbiFast);
+
+       const float * const *Mx = AP.SubstMx;
+       float OpenA = AP.LOpenA;
+       float ExtA = AP.LExtA;
+
+       byte **TB = g_TBBit;
+       float *Mrow = g_DPRow1;
+       float *Drow = g_DPRow2;
+
+// Use Mrow[-1], so...
+       Mrow[-1] = MINUS_INFINITY;
+       for (unsigned j = 0; j <= LB; ++j)
+               {
+               Mrow[j] = MINUS_INFINITY;
+               SAVE_DPM(0, j, MINUS_INFINITY);
+               SAVE_TBM(0, j, '?');
+
+               Drow[j] = MINUS_INFINITY;
+               SAVE_DPD(0, j, MINUS_INFINITY);
+               SAVE_TBD(0, j, '?');
+               }
+       
+// Main loop
+       float M0 = float (0);
+       SAVE_DPM(0, 0, 0);
+       for (unsigned i = 0; i < LA; ++i)
+               {
+               byte a = A[i];
+               const float *MxRow = Mx[a];
+               float OpenB = AP.LOpenB;
+               float ExtB = AP.LExtB;
+               float I0 = MINUS_INFINITY;
+
+               SAVE_TBM(i, 0, '?');
+
+               SAVE_DPI(i, 0, MINUS_INFINITY);
+               SAVE_DPI(i, 1, MINUS_INFINITY);
+
+               SAVE_TBI(i, 0, '?');
+               SAVE_TBI(i, 1, '?');
+               
+               byte *TBrow = TB[i];
+               for (unsigned j = 0; j < LB; ++j)
+                       {
+                       byte b = B[j];
+                       byte TraceBits = 0;
+                       float SavedM0 = M0;
+
+               // MATCH
+                       {
+               // M0 = DPM[i][j]
+               // I0 = DPI[i][j]
+               // Drow[j] = DPD[i][j]
+                       cmpm(i, j, M0);
+                       cmpd(i, j, Drow[j]);
+                       cmpi(i, j, I0);
+
+                       float xM = M0;
+                       SAVE_TBM(i+1, j+1, 'M');
+                       if (Drow[j] > xM)
+                               {
+                               xM = Drow[j];
+                               TraceBits = TRACEBITS_DM;
+                               SAVE_TBM(i+1, j+1, 'D');
+                               }
+                       if (I0 > xM)
+                               {
+                               xM = I0;
+                               TraceBits = TRACEBITS_IM;
+                               SAVE_TBM(i+1, j+1, 'I');
+                               }
+                       M0 = Mrow[j];
+                       cmpm(i, j+1, M0);
+
+                       Mrow[j] = xM + MxRow[b];
+               // Mrow[j] = DPM[i+1][j+1])
+                       SAVE_DPM(i+1, j+1, Mrow[j]);
+                       }
+                       
+               // DELETE
+                       {
+               // SavedM0 = DPM[i][j]
+               // Drow[j] = DPD[i][j]
+                       cmpm(i, j, SavedM0);
+                       cmpd(i, j, Drow[j]);
+
+                       float md = SavedM0 + OpenB;
+                       Drow[j] += ExtB;
+                       SAVE_TBD(i+1, j, 'D');
+                       if (md >= Drow[j])
+                               {
+                               Drow[j] = md;
+                               TraceBits |= TRACEBITS_MD;
+                               SAVE_TBD(i+1, j, 'M');
+                               }
+               // Drow[j] = DPD[i+1][j]
+                       SAVE_DPD(i+1, j, Drow[j]);
+                       }
+                       
+               // INSERT
+                       {
+               // SavedM0 = DPM[i][j]
+               // I0 = DPI[i][j]
+                       cmpm(i, j, SavedM0);
+                       cmpi(i, j, I0);
+                       
+                       float mi = SavedM0 + OpenA;
+                       I0 += ExtA;
+                       SAVE_TBI(i, j+1, 'I');
+                       if (mi >= I0)
+                               {
+                               I0 = mi;
+                               TraceBits |= TRACEBITS_MI;
+                               SAVE_TBI(i, j+1, 'M');
+                               }
+               // I0 = DPI[i][j+1]
+                       SAVE_DPI(i, j+1, I0);
+                       }
+                       
+                       OpenB = AP.OpenB;
+                       ExtB = AP.ExtB;
+                       
+                       TBrow[j] = TraceBits;
+                       }
+               
+       // Special case for end of Drow[]
+               {
+       // M0 = DPM[i][LB]
+       // Drow[LB] = DPD[i][LB]
+               
+               TBrow[LB] = 0;
+               float md = M0 + AP.ROpenB;
+               Drow[LB] += AP.RExtB;
+               SAVE_TBD(i+1, LB, 'D');
+               if (md >= Drow[LB])
+                       {
+                       Drow[LB] = md;
+                       TBrow[LB] = TRACEBITS_MD;
+                       SAVE_TBD(i+1, LB, 'M');
+                       }
+       // Drow[LB] = DPD[i+1][LB]
+               SAVE_DPD(i+1, LB, Drow[LB]);
+               }
+               
+               SAVE_DPM(i+1, 0, MINUS_INFINITY);
+               M0 = MINUS_INFINITY;
+
+               OpenA = AP.OpenA;
+               ExtA = AP.ExtA;
+               }
+       
+       SAVE_TBM(LA, 0, '?');
+
+// Special case for last row of DPI
+       byte *TBrow = TB[LA];
+       float I1 = MINUS_INFINITY;
+
+       SAVE_DPI(LA, 0, MINUS_INFINITY);
+       SAVE_TBI(LA, 0, '?');
+
+       SAVE_DPI(LA, 1, MINUS_INFINITY);
+       SAVE_TBI(LA, 1, '?');
+
+       for (unsigned j = 1; j < LB; ++j)
+               {
+       // Mrow[j-1] = DPM[LA][j]
+       // I1 = DPI[LA][j]
+               
+               TBrow[j] = 0;
+               float mi = Mrow[int(j)-1] + AP.ROpenA;
+               I1 += AP.RExtA;
+               SAVE_TBI(LA, j+1, 'I');
+               if (mi > I1)
+                       {
+                       I1 = mi;
+                       TBrow[j] = TRACEBITS_MI;
+                       SAVE_TBI(LA, j+1, 'M');
+                       }
+               SAVE_DPI(LA, j+1, I1);
+               }
+       
+       float FinalM = Mrow[LB-1];
+       float FinalD = Drow[LB];
+       float FinalI = I1;
+// FinalM = DPM[LA][LB]
+// FinalD = DPD[LA][LB]
+// FinalI = DPI[LA][LB]
+       
+       float Score = FinalM;
+       byte State = 'M';
+       if (FinalD > Score)
+               {
+               Score = FinalD;
+               State = 'D';
+               }
+       if (FinalI > Score)
+               {
+               Score = FinalI;
+               State = 'I';
+               }
+
+       EndTimer(ViterbiFast);
+       TraceBackBit(LA, LB, State, PD);
+
+#if    SAVE_FAST
+       g_MxDPM.LogMe();
+       g_MxDPD.LogMe();
+       g_MxDPI.LogMe();
+
+       g_MxTBM.LogMe();
+       g_MxTBD.LogMe();
+       g_MxTBI.LogMe();
+#endif
+
+       return Score;
+       }
diff --git a/windex.h b/windex.h
new file mode 100644 (file)
index 0000000..0b324ca
--- /dev/null
+++ b/windex.h
@@ -0,0 +1,71 @@
+#ifndef windex_h\r
+#define windex_h\r
+\r
+class SFasta;\r
+struct SeqDB;\r
+\r
+typedef uint32 word_t;\r
+typedef uint16 wordcount_t;\r
+typedef uint32 arrsize_t;\r
+typedef uint16 seqcountperword_t;\r
+typedef uint32 seqindex_t;\r
+typedef uint16 commonwordcount_t;\r
+\r
+const uint32 WindexFileHdr_Magic1 = 0x312DE41;\r
+const uint32 WindexFileHdr_Magic2 = 0x312DE42;\r
+const uint32 WindexFileHdr_Magic3 = 0x312DE43;\r
+const uint32 WindexFileHdr_Magic4 = 0x312DE44;\r
+\r
+struct WindexFileHdr\r
+       {\r
+       uint32 Magic1;\r
+       uint32 IsNucleo;\r
+       uint32 WordLength;\r
+       uint32 Magic2;\r
+       };\r
+\r
+class Windex\r
+       {\r
+public:\r
+       bool m_Nucleo;\r
+       bool m_RedAlpha;\r
+       unsigned m_WordLength;\r
+       unsigned m_AlphaSize;\r
+       unsigned m_WordCount;\r
+       unsigned m_Hi;\r
+       unsigned m_CapacityInc;\r
+       arrsize_t *m_Capacities;\r
+       arrsize_t *m_Sizes;\r
+       float *m_WordScores;\r
+       seqindex_t **m_SeedIndexes;\r
+       byte *m_UniqueCounts;\r
+       unsigned m_CharToLetter[256];\r
+\r
+public:\r
+       Windex();\r
+       void ToFile(const string &FileName) const;\r
+       void FromFile(const string &FileName);\r
+       void FromSFasta(SFasta &SF);\r
+       void FromSeqDB(const SeqDB &DB);\r
+       void Clear(bool ctor = false);\r
+       void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N);\r
+       void Init(bool Nucleo, unsigned WordLength);\r
+       void Init2(bool Nucleo, unsigned TableSize);\r
+       void InitRed(unsigned WordLength);\r
+       void InitWordScores(const float *const *SubstMx);\r
+       void Reset();\r
+       void LogMe() const;\r
+       unsigned LogMemSize() const;\r
+       void LogWordStats(unsigned TopWords = 10) const;\r
+       const char *WordToStr(word_t Word) const;\r
+       word_t SeqToWord(const byte *Seq) const;\r
+       unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const;\r
+       unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const;\r
+       unsigned WordsToCounts(const word_t *Words, unsigned N,\r
+         word_t *UniqueWords, seqcountperword_t *Counts) const;\r
+       unsigned GetUniqueWords(const word_t *Words, unsigned N,\r
+         word_t *UniqueWords) const;\r
+       void LogSizeHisto() const;\r
+       };\r
+\r
+#endif // windex_h\r
diff --git a/writechhit.cpp b/writechhit.cpp
new file mode 100644 (file)
index 0000000..ea67061
--- /dev/null
@@ -0,0 +1,329 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+\r
+void WriteChimeFileHdr(FILE *f)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       fprintf(f,\r
+               "\tQuery"               // 1\r
+               "\tA"                   // 2\r
+               "\tB"                   // 3\r
+               "\tIdQM"                // 4\r
+               "\tIdQA"                // 5\r
+               "\tIdQB"                // 6\r
+               "\tIdAB"                // 7\r
+               "\tIdQT"                // 8\r
+               "\tLY"                  // 9\r
+               "\tLN"                  // 10\r
+               "\tLA"                  // 11\r
+               "\tRY"                  // 12\r
+               "\tRN"                  // 13\r
+               "\tRA"                  // 14\r
+               "\tDiv"                 // 15\r
+               "\tY"                   // 16\r
+               "\n"\r
+               );\r
+       }\r
+\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               {\r
+               fprintf(f, "0.0000");           // 0\r
+\r
+               fprintf(f,\r
+                 "\t%s", Hit.QLabel.c_str());  // 1\r
+\r
+               fprintf(f,\r
+                 "\t*"                                         // 2\r
+                 "\t*"                                         // 3\r
+                 "\t*"                                         // 4\r
+                 "\t*"                                         // 5\r
+                 "\t*"                                         // 6\r
+                 "\t*"                                         // 7\r
+                 "\t*"                                         // 8\r
+                 "\t*"                                         // 9\r
+                 "\t*"                                         // 10\r
+                 "\t*"                                         // 11\r
+                 "\t*"                                         // 12\r
+                 "\t*"                                         // 13\r
+                 "\t*"                                         // 14\r
+                 "\t*"                                         // 15\r
+                 "\tN"                                         // 16\r
+                 "\n"\r
+                 );\r
+               return;\r
+               }\r
+\r
+       fprintf(f, "%.4f", Hit.Score);          // 0\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.QLabel.c_str(), f);           // 1\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.ALabel.c_str(), f);           // 2\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.BLabel.c_str(), f);           // 3\r
+\r
+       fprintf(f, "\t%.1f", Hit.PctIdQM);      // 4\r
+       fprintf(f, "\t%.1f", Hit.PctIdQA);      // 5\r
+       fprintf(f, "\t%.1f", Hit.PctIdQB);      // 6\r
+       fprintf(f, "\t%.1f", Hit.PctIdAB);      // 7\r
+       fprintf(f, "\t%.1f", Hit.PctIdQT);      // 8\r
+\r
+       fprintf(f, "\t%u", Hit.CS_LY);          // 9\r
+       fprintf(f, "\t%u", Hit.CS_LN);          // 10\r
+       fprintf(f, "\t%u", Hit.CS_LA);          // 11\r
+\r
+       fprintf(f, "\t%u", Hit.CS_RY);          // 12\r
+       fprintf(f, "\t%u", Hit.CS_RN);          // 13\r
+       fprintf(f, "\t%u", Hit.CS_RA);          // 14\r
+\r
+       fprintf(f, "\t%.2f", Hit.Div);          // 15\r
+\r
+       fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
+       fputc('\n', f);\r
+       }\r
+\r
+unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
+       {\r
+       unsigned UL = 0;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               if (!isgap(Seq[i]))\r
+                       ++UL;\r
+       return UL;\r
+       }\r
+\r
+void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               return;\r
+\r
+       const string &Q3 = Hit.Q3;\r
+       const string &A3 = Hit.A3;\r
+       const string &B3 = Hit.B3;\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+// Aligned\r
+       unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
+       unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
+       unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
+\r
+       fprintf(f, "\n");\r
+       fprintf(f, "------------------------------------------------------------------------\n");\r
+       fprintf(f, "Query   (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
+       fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
+       fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
+\r
+// Strip terminal gaps in query\r
+       unsigned FromCol = UINT_MAX;\r
+       unsigned ToCol = UINT_MAX;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (!isgap(Q3Seq[Col]))\r
+                       {\r
+                       if (FromCol == UINT_MAX)\r
+                               FromCol = Col;\r
+                       ToCol = Col;\r
+                       }\r
+               }\r
+\r
+       unsigned QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned BPos = 0;\r
+       for (unsigned Col = 0; Col < FromCol; ++Col)\r
+               {\r
+               if (!isgap(A3Seq[Col]))\r
+                       ++APos;\r
+               if (!isgap(B3Seq[Col]))\r
+                       ++BPos;\r
+               }\r
+\r
+       unsigned Range = ToCol - FromCol + 1;\r
+       unsigned RowCount = (Range + 79)/80;\r
+       unsigned RowFromCol = FromCol;\r
+       for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
+               {\r
+               fprintf(f, "\n");\r
+               unsigned RowToCol = RowFromCol + 79;\r
+               if (RowToCol > ToCol)\r
+                       RowToCol = ToCol;\r
+\r
+       // A row\r
+               fprintf(f, "A %5u ", APos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       if (a != q)\r
+                               a = tolower(a);\r
+                       fprintf(f, "%c", a);\r
+                       if (!isgap(a))\r
+                               ++APos;\r
+                       }\r
+               fprintf(f, " %u\n", APos);\r
+\r
+       // Q row\r
+               fprintf(f, "Q %5u ", QPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       fprintf(f, "%c", q);\r
+                       if (!isgap(q))\r
+                               ++QPos;\r
+                       }\r
+               fprintf(f, " %u\n", QPos);\r
+\r
+       // B row\r
+               fprintf(f, "B %5u ", BPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+                       if (b != q)\r
+                               b = tolower(b);\r
+                       fprintf(f, "%c", b);\r
+                       if (!isgap(b))\r
+                               ++BPos;\r
+                       }\r
+               fprintf(f, " %u\n", BPos);\r
+\r
+       // Diffs\r
+               fprintf(f, "Diffs   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b))\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = 'A';\r
+                               else if (q == b && q != a)\r
+                                       c = 'b';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = 'B';\r
+                               else if (q == a && q != b)\r
+                                       c = 'a';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // SNPs\r
+               fprintf(f, "Votes   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
+                       bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = '+';\r
+                               else if (q == b && q != a)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = '+';\r
+                               else if (q == a && q != b)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // LR row\r
+               fprintf(f, "Model   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       if (Col < Hit.ColXLo)\r
+                               fprintf(f, "A");\r
+                       else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
+                               fprintf(f, "x");\r
+                       else\r
+                               fprintf(f, "B");\r
+                       }\r
+\r
+               fprintf(f, "\n");\r
+\r
+               RowFromCol += 80;\r
+               }\r
+       fprintf(f, "\n");\r
+\r
+       double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
+       double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
+\r
+       unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
+       unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
+\r
+       double PctL = Pct(Hit.CS_LY, LTot);\r
+       double PctR = Pct(Hit.CS_RY, RTot);\r
+\r
+       fprintf(f,\r
+         "Ids.  QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
+         Hit.PctIdQA,\r
+         Hit.PctIdQB,\r
+         Hit.PctIdAB,\r
+         Hit.PctIdQM,\r
+         Div);\r
+\r
+       fprintf(f,\r
+         "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
+         LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
+         RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
+         Hit.Score);\r
+       }\r